camdemy.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_urllib_parse
  6. from ..utils import parse_iso8601
  7. class CamdemyIE(InfoExtractor):
  8. _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)'
  9. _TESTS = [{
  10. # single file
  11. 'url': 'http://www.camdemy.com/media/5181/',
  12. 'md5': '5a5562b6a98b37873119102e052e311b',
  13. 'info_dict': {
  14. 'id': '5181',
  15. 'ext': 'mp4',
  16. 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
  17. 'thumbnail': 're:^https?://.*\.jpg$',
  18. 'description': '',
  19. 'creator': 'ss11spring',
  20. 'upload_date': '20130114',
  21. 'timestamp': 1358154556,
  22. }
  23. }, {
  24. # With non-empty description
  25. 'url': 'http://www.camdemy.com/media/13885',
  26. 'md5': '4576a3bb2581f86c61044822adbd1249',
  27. 'info_dict': {
  28. 'id': '13885',
  29. 'ext': 'mp4',
  30. 'title': 'EverCam + Camdemy QuickStart',
  31. 'thumbnail': 're:^https?://.*\.jpg$',
  32. 'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
  33. 'creator': 'evercam',
  34. 'upload_date': '20140620',
  35. 'timestamp': 1403271569,
  36. }
  37. }, {
  38. # External source
  39. 'url': 'http://www.camdemy.com/media/14842',
  40. 'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
  41. 'info_dict': {
  42. 'id': '2vsYQzNIsJo',
  43. 'ext': 'mp4',
  44. 'upload_date': '20130211',
  45. 'uploader': 'Hun Kim',
  46. 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
  47. 'uploader_id': 'hunkimtutorials',
  48. 'title': 'Excel 2013 Tutorial - How to add Password Protection',
  49. }
  50. }]
  51. def _real_extract(self, url):
  52. video_id = self._match_id(url)
  53. page = self._download_webpage(url, video_id)
  54. srcFrom = self._html_search_regex(
  55. r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
  56. 'external source', default=None)
  57. if srcFrom:
  58. return self.url_result(srcFrom)
  59. oembed_obj = self._download_json(
  60. 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
  61. thumb_url = oembed_obj['thumbnail_url']
  62. video_folder = compat_urllib_parse.urljoin(thumb_url, 'video/')
  63. fileListXML = self._download_xml(
  64. compat_urllib_parse.urljoin(video_folder, 'fileList.xml'),
  65. video_id, 'Filelist XML')
  66. fileName = fileListXML.find('./video/item/fileName').text
  67. creation_time = self._html_search_regex(
  68. r"<div class='title'>Posted :</div>[\r\n ]*<div class='value'>([^<>]+)<",
  69. page, 'creation time', flags=re.MULTILINE) + '+08:00'
  70. creation_timestamp = parse_iso8601(creation_time, delimiter=' ')
  71. view_count_str = self._html_search_regex(
  72. r"<div class='title'>Views :</div>[\r\n ]*<div class='value'>([^<>]+)<",
  73. page, 'view count', flags=re.MULTILINE)
  74. views = int(view_count_str.replace(',', ''))
  75. return {
  76. 'id': video_id,
  77. 'url': compat_urllib_parse.urljoin(video_folder, fileName),
  78. 'title': oembed_obj['title'],
  79. 'thumbnail': thumb_url,
  80. 'description': self._html_search_meta('description', page),
  81. 'creator': oembed_obj['author_name'],
  82. 'duration': oembed_obj['duration'],
  83. 'timestamp': creation_timestamp,
  84. 'view_count': views,
  85. }
  86. class CamdemyFolderIE(InfoExtractor):
  87. _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
  88. _TESTS = [{
  89. # links with trailing slash
  90. 'url': 'http://www.camdemy.com/folder/450',
  91. 'info_dict': {
  92. 'id': '450',
  93. 'title': '信號與系統 2012 & 2011 (Signals and Systems)',
  94. },
  95. 'playlist_mincount': 145
  96. }, {
  97. # links without trailing slash
  98. # and multi-page
  99. 'url': 'http://www.camdemy.com/folder/853',
  100. 'info_dict': {
  101. 'id': '853',
  102. 'title': '科學計算 - 使用 Matlab'
  103. },
  104. 'playlist_mincount': 20
  105. }, {
  106. # with displayMode parameter. For testing the codes to add parameters
  107. 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
  108. 'info_dict': {
  109. 'id': '853',
  110. 'title': '科學計算 - 使用 Matlab'
  111. },
  112. 'playlist_mincount': 20
  113. }]
  114. def _real_extract(self, url):
  115. folder_id = self._match_id(url)
  116. # Add displayMode=list so that all links are displayed in a single page
  117. parsed_url = list(compat_urllib_parse.urlparse(url))
  118. query = dict(compat_urllib_parse.parse_qsl(parsed_url[4]))
  119. query.update({'displayMode': 'list'})
  120. parsed_url[4] = compat_urllib_parse.urlencode(query)
  121. final_url = compat_urllib_parse.urlunparse(parsed_url)
  122. page = self._download_webpage(final_url, folder_id)
  123. matches = re.findall(r"href='(/media/\d+/?)'", page)
  124. entries = [self.url_result('http://www.camdemy.com' + media_path)
  125. for media_path in matches]
  126. folder_title = self._html_search_meta('keywords', page)
  127. return self.playlist_result(entries, folder_id, folder_title)