dailymotion.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. import re
  2. import json
  3. import itertools
  4. import socket
  5. from .common import InfoExtractor
  6. from .subtitles import SubtitlesIE
  7. from ..utils import (
  8. compat_http_client,
  9. compat_urllib_error,
  10. compat_urllib_request,
  11. compat_str,
  12. get_element_by_attribute,
  13. get_element_by_id,
  14. ExtractorError,
  15. )
  16. class DailyMotionSubtitlesIE(SubtitlesIE):
  17. def _get_available_subtitles(self, video_id):
  18. request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
  19. try:
  20. sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
  21. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  22. self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
  23. return {}
  24. info = json.loads(sub_list)
  25. if (info['total'] > 0):
  26. sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
  27. return sub_lang_list
  28. self._downloader.report_warning(u'video doesn\'t have subtitles')
  29. return {}
  30. def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
  31. sub_lang_list = self._get_available_subtitles(video_id)
  32. return sub_lang_list[sub_lang]
  33. def _request_automatic_caption(self, video_id, webpage):
  34. self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
  35. return {}
  36. class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
  37. """Information Extractor for Dailymotion"""
  38. _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
  39. IE_NAME = u'dailymotion'
  40. _TEST = {
  41. u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
  42. u'file': u'x33vw9.mp4',
  43. u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
  44. u'info_dict': {
  45. u"uploader": u"Alex and Van .",
  46. u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
  47. }
  48. }
  49. def _real_extract(self, url):
  50. # Extract id and simplified title from URL
  51. mobj = re.match(self._VALID_URL, url)
  52. video_id = mobj.group(1).split('_')[0].split('?')[0]
  53. video_extension = 'mp4'
  54. # Retrieve video webpage to extract further information
  55. request = compat_urllib_request.Request(url)
  56. request.add_header('Cookie', 'family_filter=off')
  57. webpage = self._download_webpage(request, video_id)
  58. # Extract URL, uploader and title from webpage
  59. self.report_extraction(video_id)
  60. video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
  61. # Looking for official user
  62. r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
  63. webpage, 'video uploader')
  64. video_upload_date = None
  65. mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
  66. if mobj is not None:
  67. video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
  68. embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
  69. embed_page = self._download_webpage(embed_url, video_id,
  70. u'Downloading embed page')
  71. info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
  72. info = json.loads(info)
  73. # TODO: support choosing qualities
  74. for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
  75. 'stream_h264_hq_url', 'stream_h264_url',
  76. 'stream_h264_ld_url']:
  77. if info.get(key): # key in info and info[key]:
  78. max_quality = key
  79. self.to_screen(u'%s: Using %s' % (video_id, key))
  80. break
  81. else:
  82. raise ExtractorError(u'Unable to extract video URL')
  83. video_url = info[max_quality]
  84. # subtitles
  85. video_subtitles = None
  86. video_webpage = None
  87. if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
  88. video_subtitles = self._extract_subtitles(video_id)
  89. elif self._downloader.params.get('writeautomaticsub', False):
  90. video_subtitles = self._request_automatic_caption(video_id, video_webpage)
  91. if self._downloader.params.get('listsubtitles', False):
  92. self._list_available_subtitles(video_id)
  93. return
  94. if 'length_seconds' not in info:
  95. self._downloader.report_warning(u'unable to extract video duration')
  96. video_duration = ''
  97. else:
  98. video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
  99. return [{
  100. 'id': video_id,
  101. 'url': video_url,
  102. 'uploader': video_uploader,
  103. 'upload_date': video_upload_date,
  104. 'title': self._og_search_title(webpage),
  105. 'ext': video_extension,
  106. 'subtitles': video_subtitles,
  107. 'thumbnail': info['thumbnail_url']
  108. }]