camdemy.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_urlparse
  6. from ..utils import parse_iso8601
  7. class CamdemyIE(InfoExtractor):
  8. _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+).*'
  9. _TESTS = [{
  10. # single file
  11. 'url': 'http://www.camdemy.com/media/5181/',
  12. 'md5': '5a5562b6a98b37873119102e052e311b',
  13. 'info_dict': {
  14. 'id': '5181',
  15. 'ext': 'mp4',
  16. 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
  17. 'thumbnail': 're:^https?://.*\.jpg$',
  18. 'description': '',
  19. 'creator': 'ss11spring',
  20. 'upload_date': '20130114',
  21. 'timestamp': 1358154556,
  22. }
  23. }, {
  24. # With non-empty description
  25. 'url': 'http://www.camdemy.com/media/13885',
  26. 'md5': '4576a3bb2581f86c61044822adbd1249',
  27. 'info_dict': {
  28. 'id': '13885',
  29. 'ext': 'mp4',
  30. 'title': 'EverCam + Camdemy QuickStart',
  31. 'thumbnail': 're:^https?://.*\.jpg$',
  32. 'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
  33. 'creator': 'evercam',
  34. 'upload_date': '20140620',
  35. 'timestamp': 1403271569,
  36. }
  37. }, {
  38. # External source
  39. 'url': 'http://www.camdemy.com/media/14842',
  40. 'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
  41. 'info_dict': {
  42. 'id': '2vsYQzNIsJo',
  43. 'ext': 'mp4',
  44. 'upload_date': '20130211',
  45. 'uploader': 'Hun Kim',
  46. 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
  47. 'uploader_id': 'hunkimtutorials',
  48. 'title': 'Excel 2013 Tutorial - How to add Password Protection',
  49. }
  50. }]
  51. def _real_extract(self, url):
  52. video_id = self._match_id(url)
  53. page = self._download_webpage(url, video_id)
  54. srcFrom = self._html_search_regex(
  55. r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
  56. 'external source', default=None)
  57. if srcFrom:
  58. return self.url_result(srcFrom)
  59. oembed_obj = self._download_json(
  60. 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
  61. thumb_url = oembed_obj['thumbnail_url']
  62. video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
  63. fileListXML = self._download_xml(
  64. compat_urlparse.urljoin(video_folder, 'fileList.xml'),
  65. video_id, 'Filelist XML')
  66. fileName = fileListXML.find('./video/item/fileName').text
  67. creation_time = self._html_search_regex(
  68. r"<div class='title'>Posted :</div>.*<div class='value'>([0-9:\- ]+)<",
  69. page, 'creation time', flags=re.MULTILINE | re.DOTALL) + '+08:00'
  70. creation_timestamp = parse_iso8601(creation_time, delimiter=' ')
  71. view_count_str = self._html_search_regex(
  72. r"<div class='title'>Views :</div>.*<div class='value'>([0-9,]+)<",
  73. page, 'view count', flags=re.MULTILINE | re.DOTALL)
  74. views = int(view_count_str.replace(',', ''))
  75. return {
  76. 'id': video_id,
  77. 'url': compat_urlparse.urljoin(video_folder, fileName),
  78. 'title': oembed_obj['title'],
  79. 'thumbnail': thumb_url,
  80. 'description': self._html_search_meta('description', page),
  81. 'creator': oembed_obj['author_name'],
  82. 'duration': oembed_obj['duration'],
  83. 'timestamp': creation_timestamp,
  84. 'view_count': views,
  85. }