raywenderlich.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from .vimeo import VimeoIE
  5. from ..utils import (
  6. extract_attributes,
  7. ExtractorError,
  8. orderedSet,
  9. smuggle_url,
  10. unsmuggle_url,
  11. urljoin,
  12. )
  13. class RayWenderlichIE(InfoExtractor):
  14. _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
  15. _TESTS = [{
  16. 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  17. 'info_dict': {
  18. 'id': '248377018',
  19. 'ext': 'mp4',
  20. 'title': 'Testing In iOS Episode 1: Introduction',
  21. 'duration': 133,
  22. 'uploader': 'Ray Wenderlich',
  23. 'uploader_id': 'user3304672',
  24. },
  25. 'params': {
  26. 'noplaylist': True,
  27. 'skip_download': True,
  28. },
  29. 'add_ie': [VimeoIE.ie_key()],
  30. 'expected_warnings': ['HTTP Error 403: Forbidden'],
  31. }, {
  32. 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  33. 'info_dict': {
  34. 'title': 'Testing in iOS',
  35. 'id': '105-testing-in-ios',
  36. },
  37. 'params': {
  38. 'noplaylist': False,
  39. },
  40. 'playlist_count': 29,
  41. }]
  42. def _real_extract(self, url):
  43. url, smuggled_data = unsmuggle_url(url, {})
  44. mobj = re.match(self._VALID_URL, url)
  45. course_id, lesson_id = mobj.group('course_id', 'id')
  46. video_id = '%s/%s' % (course_id, lesson_id)
  47. webpage = self._download_webpage(url, video_id)
  48. no_playlist = self._downloader.params.get('noplaylist')
  49. if no_playlist or smuggled_data.get('force_video', False):
  50. if no_playlist:
  51. self.to_screen(
  52. 'Downloading just video %s because of --no-playlist'
  53. % video_id)
  54. if '>Subscribe to unlock' in webpage:
  55. raise ExtractorError(
  56. 'This content is only available for subscribers',
  57. expected=True)
  58. vimeo_id = self._search_regex(
  59. r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
  60. return self.url_result(
  61. VimeoIE._smuggle_referrer(
  62. 'https://player.vimeo.com/video/%s' % vimeo_id, url),
  63. ie=VimeoIE.ie_key(), video_id=vimeo_id)
  64. self.to_screen(
  65. 'Downloading playlist %s - add --no-playlist to just download video'
  66. % course_id)
  67. lesson_ids = set((lesson_id, ))
  68. for lesson in re.findall(
  69. r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
  70. attrs = extract_attributes(lesson)
  71. if not attrs:
  72. continue
  73. lesson_url = attrs.get('href')
  74. if not lesson_url:
  75. continue
  76. lesson_id = self._search_regex(
  77. r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
  78. if not lesson_id:
  79. continue
  80. lesson_ids.add(lesson_id)
  81. entries = []
  82. for lesson_id in sorted(lesson_ids):
  83. entries.append(self.url_result(
  84. smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
  85. ie=RayWenderlichIE.ie_key()))
  86. title = self._search_regex(
  87. r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
  88. default=None)
  89. return self.playlist_result(entries, course_id, title)