daum.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import itertools
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urllib_parse,
  8. compat_urllib_parse_unquote,
  9. )
  10. from ..utils import (
  11. int_or_none,
  12. str_to_int,
  13. xpath_text,
  14. unescapeHTML,
  15. )
  16. class DaumIE(InfoExtractor):
  17. _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
  18. IE_NAME = 'daum.net'
  19. _TESTS = [{
  20. 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
  21. 'info_dict': {
  22. 'id': 'vab4dyeDBysyBssyukBUjBz',
  23. 'ext': 'mp4',
  24. 'title': '마크 헌트 vs 안토니오 실바',
  25. 'description': 'Mark Hunt vs Antonio Silva',
  26. 'upload_date': '20131217',
  27. 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
  28. 'duration': 2117,
  29. 'view_count': int,
  30. 'comment_count': int,
  31. },
  32. }, {
  33. 'url': 'http://m.tvpot.daum.net/v/65139429',
  34. 'info_dict': {
  35. 'id': '65139429',
  36. 'ext': 'mp4',
  37. 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed',
  38. 'description': 'md5:79794514261164ff27e36a21ad229fc5',
  39. 'upload_date': '20150604',
  40. 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
  41. 'duration': 154,
  42. 'view_count': int,
  43. 'comment_count': int,
  44. },
  45. }, {
  46. 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
  47. 'only_matching': True,
  48. }, {
  49. 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
  50. 'info_dict': {
  51. 'id': 'vwIpVpCQsT8$',
  52. 'ext': 'flv',
  53. 'title': '01-Korean War ( Trouble on the horizon )',
  54. 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
  55. 'upload_date': '20080223',
  56. 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
  57. 'duration': 249,
  58. 'view_count': int,
  59. 'comment_count': int,
  60. },
  61. }]
  62. def _real_extract(self, url):
  63. video_id = compat_urllib_parse_unquote(self._match_id(url))
  64. query = compat_urllib_parse.urlencode({'vid': video_id})
  65. movie_data = self._download_json(
  66. 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
  67. video_id, 'Downloading video formats info')
  68. # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
  69. if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
  70. return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
  71. info = self._download_xml(
  72. 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
  73. 'Downloading video info')
  74. formats = []
  75. for format_el in movie_data['output_list']['output_list']:
  76. profile = format_el['profile']
  77. format_query = compat_urllib_parse.urlencode({
  78. 'vid': video_id,
  79. 'profile': profile,
  80. })
  81. url_doc = self._download_xml(
  82. 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
  83. video_id, note='Downloading video data for %s format' % profile)
  84. format_url = url_doc.find('result/url').text
  85. formats.append({
  86. 'url': format_url,
  87. 'format_id': profile,
  88. 'width': int_or_none(format_el.get('width')),
  89. 'height': int_or_none(format_el.get('height')),
  90. 'filesize': int_or_none(format_el.get('filesize')),
  91. })
  92. self._sort_formats(formats)
  93. return {
  94. 'id': video_id,
  95. 'title': info.find('TITLE').text,
  96. 'formats': formats,
  97. 'thumbnail': xpath_text(info, 'THUMB_URL'),
  98. 'description': xpath_text(info, 'CONTENTS'),
  99. 'duration': int_or_none(xpath_text(info, 'DURATION')),
  100. 'upload_date': info.find('REGDTTM').text[:8],
  101. 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
  102. 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
  103. }
  104. class DaumClipIE(InfoExtractor):
  105. _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
  106. IE_NAME = 'daum.net:clip'
  107. _TESTS = [{
  108. 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
  109. 'info_dict': {
  110. 'id': '52554690',
  111. 'ext': 'mp4',
  112. 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
  113. 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
  114. 'upload_date': '20130831',
  115. 'thumbnail': 're:^https?://.*\.(?:jpg|png)',
  116. 'duration': 3868,
  117. 'view_count': int,
  118. },
  119. }, {
  120. 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
  121. 'only_matching': True,
  122. }]
  123. def _real_extract(self, url):
  124. video_id = self._match_id(url)
  125. clip_info = self._download_json(
  126. 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
  127. video_id, 'Downloading clip info')['clip_bean']
  128. return {
  129. '_type': 'url_transparent',
  130. 'id': video_id,
  131. 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
  132. 'title': unescapeHTML(clip_info['title']),
  133. 'thumbnail': clip_info.get('thumb_url'),
  134. 'description': clip_info.get('contents'),
  135. 'duration': int_or_none(clip_info.get('duration')),
  136. 'upload_date': clip_info.get('up_date')[:8],
  137. 'view_count': int_or_none(clip_info.get('play_count')),
  138. 'ie_key': 'Daum',
  139. }
  140. class DaumListIE(InfoExtractor):
  141. def _get_entries(self, list_id, list_id_type):
  142. name = None
  143. entries = []
  144. for pagenum in itertools.count(1):
  145. list_info = self._download_json(
  146. 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
  147. pagenum, list_id_type, list_id), list_id,'Downloading list info - %s' % pagenum)
  148. entries.extend([
  149. self.url_result(
  150. 'http://tvpot.daum.net/v/%s' % clip['vid'])
  151. for clip in list_info['clip_list']
  152. ])
  153. if not name:
  154. name = list_info.get('playlist_bean', {}).get('name') or \
  155. list_info.get('potInfo', {}).get('name')
  156. if not list_info.get('has_more'):
  157. break
  158. return name, entries
  159. class DaumPlaylistIE(DaumListIE):
  160. _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
  161. IE_NAME = 'daum.net:playlist'
  162. _TESTS = [{
  163. 'note': 'Playlist url with clipid',
  164. 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
  165. 'info_dict': {
  166. 'id': '6213966',
  167. 'title': 'Woorissica Official',
  168. },
  169. 'playlist_mincount': 181
  170. }, {
  171. 'note': 'Playlist url with clipid - noplaylist',
  172. 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
  173. 'info_dict': {
  174. 'id': '73806844',
  175. 'ext': 'mp4',
  176. 'title': '151017 Airport',
  177. 'upload_date': '20160117',
  178. },
  179. 'params': {
  180. 'noplaylist': True,
  181. 'skip_download': True,
  182. }
  183. }]
  184. def _real_extract(self, url):
  185. if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'):
  186. return self.url_result(url, 'DaumClip')
  187. list_id = self._match_id(url)
  188. self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
  189. name, entries = self._get_entries(list_id, 'playlistid')
  190. return self.playlist_result(entries, list_id, name)
  191. class DaumUserIE(DaumListIE):
  192. _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.do\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
  193. IE_NAME = 'daum.net:user'
  194. _TESTS = [{
  195. 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
  196. 'info_dict': {
  197. 'id': 'o2scDLIVbHc0',
  198. 'title': '마이 리틀 텔레비전',
  199. },
  200. 'playlist_mincount': 213
  201. }, {
  202. 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
  203. 'info_dict': {
  204. 'id': '73801156',
  205. 'ext': 'mp4',
  206. 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
  207. 'upload_date': '20160117',
  208. 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
  209. },
  210. 'params': {
  211. 'noplaylist': True,
  212. 'skip_download': True,
  213. }
  214. }, {
  215. 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
  216. 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
  217. 'info_dict': {
  218. 'id': '6196631',
  219. 'title': '마이 리틀 텔레비전 - 20160109',
  220. },
  221. 'playlist_count': 11
  222. }, {
  223. 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
  224. 'only_matching': True,
  225. }]
  226. def _real_extract(self, url):
  227. if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'):
  228. return self.url_result(url, 'DaumClip')
  229. if DaumPlaylistIE.suitable(url):
  230. return self.url_result(url, 'DaumPlaylist')
  231. list_id = self._match_id(url)
  232. self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
  233. name, entries = self._get_entries(list_id, 'ownerid')
  234. return self.playlist_result(entries, list_id, name)