naver.py 3.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_urllib_parse_urlencode,
  7. compat_urlparse,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. )
  12. class NaverIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
  14. _TESTS = [{
  15. 'url': 'http://tvcast.naver.com/v/81652',
  16. 'info_dict': {
  17. 'id': '81652',
  18. 'ext': 'mp4',
  19. 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
  20. 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
  21. 'upload_date': '20130903',
  22. },
  23. }, {
  24. 'url': 'http://tvcast.naver.com/v/395837',
  25. 'md5': '638ed4c12012c458fefcddfd01f173cd',
  26. 'info_dict': {
  27. 'id': '395837',
  28. 'ext': 'mp4',
  29. 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
  30. 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
  31. 'upload_date': '20150519',
  32. },
  33. 'skip': 'Georestricted',
  34. }]
  35. def _real_extract(self, url):
  36. video_id = self._match_id(url)
  37. webpage = self._download_webpage(url, video_id)
  38. m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
  39. webpage)
  40. if m_id is None:
  41. error = self._html_search_regex(
  42. r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
  43. webpage, 'error', default=None)
  44. if error:
  45. raise ExtractorError(error, expected=True)
  46. raise ExtractorError('couldn\'t extract vid and key')
  47. vid = m_id.group(1)
  48. key = m_id.group(2)
  49. query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, })
  50. query_urls = compat_urllib_parse_urlencode({
  51. 'masterVid': vid,
  52. 'protocol': 'p2p',
  53. 'inKey': key,
  54. })
  55. info = self._download_xml(
  56. 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
  57. video_id, 'Downloading video info')
  58. urls = self._download_xml(
  59. 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
  60. video_id, 'Downloading video formats info')
  61. formats = []
  62. for format_el in urls.findall('EncodingOptions/EncodingOption'):
  63. domain = format_el.find('Domain').text
  64. uri = format_el.find('uri').text
  65. f = {
  66. 'url': compat_urlparse.urljoin(domain, uri),
  67. 'ext': 'mp4',
  68. 'width': int(format_el.find('width').text),
  69. 'height': int(format_el.find('height').text),
  70. }
  71. if domain.startswith('rtmp'):
  72. # urlparse does not support custom schemes
  73. # https://bugs.python.org/issue18828
  74. f.update({
  75. 'url': domain + uri,
  76. 'ext': 'flv',
  77. 'rtmp_protocol': '1', # rtmpt
  78. })
  79. formats.append(f)
  80. self._sort_formats(formats)
  81. return {
  82. 'id': video_id,
  83. 'title': info.find('Subject').text,
  84. 'formats': formats,
  85. 'description': self._og_search_description(webpage),
  86. 'thumbnail': self._og_search_thumbnail(webpage),
  87. 'upload_date': info.find('WriteDate').text.replace('.', ''),
  88. 'view_count': int(info.find('PlayCount').text),
  89. }