frontendmaster.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import collections
  4. import re
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urlparse)
  8. from ..utils import (
  9. ExtractorError,
  10. urlencode_postdata,
  11. qualities, unescapeHTML)
  12. class FrontEndMasterBaseIE(InfoExtractor):
  13. _API_BASE = 'https://api.frontendmasters.com/v1/kabuki/courses'
  14. _VIDEO_BASE = 'http://www.frontendmasters.com/courses'
  15. _CAPTIONS_BASE = 'https://api.frontendmasters.com/v1/kabuki/transcripts'
  16. _COOKIES_BASE = 'https://api.frontendmasters.com'
  17. _LOGIN_URL = 'https://frontendmasters.com/login/'
  18. _QUALITIES_PREFERENCE = ('low', 'medium', 'high')
  19. _QUALITIES = {
  20. 'low': {'width': 480, 'height': 360},
  21. 'medium': {'width': 1280, 'height': 720},
  22. 'high': {'width': 1920, 'height': 1080}
  23. }
  24. AllowedQuality = collections.namedtuple('AllowedQuality',
  25. ['ext', 'qualities'])
  26. _ALLOWED_QUALITIES = [
  27. AllowedQuality('webm', ['low', 'medium', 'high']),
  28. AllowedQuality('mp4', ['low', 'medium', 'high'])
  29. ]
  30. def _real_initialize(self):
  31. self._login()
  32. def _login(self):
  33. (username, password) = self._get_login_info()
  34. if username is None:
  35. return
  36. login_page = self._download_webpage(
  37. self._LOGIN_URL, None, 'Downloading login page')
  38. login_form = self._hidden_inputs(login_page)
  39. login_form.update({
  40. 'username': username,
  41. 'password': password
  42. })
  43. post_url = self._search_regex(
  44. r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
  45. 'post_url', default=self._LOGIN_URL, group='url')
  46. if not post_url.startswith('http'):
  47. post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
  48. response = self._download_webpage(
  49. post_url, None, 'Logging in',
  50. data=urlencode_postdata(login_form),
  51. headers={'Content-Type': 'application/x-www-form-urlencoded'}
  52. )
  53. error = self._search_regex(
  54. r'<div[^>]+class=["\']Message MessageAlert["\'][^>]*>'
  55. r'([^<]+)'
  56. r'</div>',
  57. response, 'error message', default=None)
  58. if error:
  59. raise ExtractorError('Unable to login: %s' % unescapeHTML(error),
  60. expected=True)
  61. def _download_course(self, course_id, url):
  62. response = self._download_json(
  63. '%s/%s' % (self._API_BASE, course_id), course_id,
  64. 'Downloading course JSON',
  65. headers={
  66. 'Content-Type': 'application/json;charset=utf-8',
  67. 'Referer': url,
  68. })
  69. return response
  70. @staticmethod
  71. def _pair_section_video_element(lesson_elements):
  72. sections = {}
  73. current_section = None
  74. current_section_number = 0
  75. for elem in lesson_elements:
  76. if not isinstance(elem, int):
  77. elem_name = elem
  78. if not isinstance(elem_name, str):
  79. # convert unicode to str
  80. elem_name = elem.encode('utf-8')
  81. (current_section, current_section_number) = \
  82. (elem_name, current_section_number + 1)
  83. else:
  84. if current_section:
  85. sections[elem] = (current_section, current_section_number)
  86. return sections
  87. class FrontEndMasterIE(FrontEndMasterBaseIE):
  88. IE_NAME = 'frontend-masters'
  89. _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/' \
  90. r'(?P<courseid>[a-z\-]+)/' \
  91. r'(?P<id>[a-z\-]+)'
  92. _NETRC_MACHINE = 'frontendmasters'
  93. _TEST = {
  94. 'url': 'https://frontendmasters.com/courses/web-development/tools',
  95. 'md5': '7f161159710d6b7016a4f4af6fcb05e2',
  96. 'info_dict': {
  97. 'id': 'tools',
  98. 'title': 'Tools',
  99. 'display_id': 'tools',
  100. 'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
  101. 'ext': 'mp4'
  102. },
  103. 'skip': 'Requires FrontendMasters account credentials',
  104. }
  105. def _get_subtitles(self, video_hash, video_id):
  106. captions = self._download_webpage(
  107. '%s/%s.vtt' % (self._CAPTIONS_BASE, video_hash), video_id,
  108. fatal=False)
  109. if captions:
  110. return {
  111. 'en': [{
  112. 'ext': 'vtt',
  113. 'data': captions
  114. }]
  115. }
  116. def _real_extract(self, url):
  117. mobj = re.match(self._VALID_URL, url)
  118. video_id = mobj.group('id')
  119. course_id = mobj.group('courseid')
  120. course_json_content = self._download_course(course_id=course_id,
  121. url=url)
  122. # Necessary to get mandatory informations like title and video_url
  123. lesson_index = course_json_content.get('lessonSlugs').index(video_id)
  124. lesson_hash = course_json_content.get('lessonHashes')[lesson_index]
  125. lesson_data = course_json_content.get('lessonData')[lesson_hash]
  126. # This is necessary to get the link for the video
  127. lesson_source_base = lesson_data['sourceBase']
  128. lesson_title = lesson_data['title']
  129. # Some optional fields
  130. lesson_description = lesson_data.get('description')
  131. lesson_index = lesson_data.get('index')
  132. lesson_slug = lesson_data.get('slug')
  133. lesson_thumbnail_url = lesson_data.get('thumbnail')
  134. lesson_section_elements = course_json_content.get('lessonElements')
  135. try:
  136. course_sections_pairing = self._pair_section_video_element(
  137. lesson_section_elements)
  138. lesson_section = \
  139. course_sections_pairing.get(lesson_index)[0]
  140. lesson_section_number = \
  141. course_sections_pairing.get(lesson_index)[1]
  142. except Exception:
  143. lesson_section = None
  144. lesson_section_number = None
  145. video_request_url = '%s/source'
  146. video_request_headers = {
  147. 'origin': 'https://frontendmasters.com',
  148. 'referer': lesson_source_base,
  149. }
  150. quality_key = qualities(self._QUALITIES_PREFERENCE)
  151. formats = []
  152. for ext, qualities_ in self._ALLOWED_QUALITIES:
  153. for quality in qualities_:
  154. f = self._QUALITIES[quality].copy()
  155. video_request_params = {
  156. 'r': f['height'],
  157. 'f': ext
  158. }
  159. video_response = self._download_json(
  160. video_request_url % lesson_source_base, video_id,
  161. query=video_request_params, headers=video_request_headers)
  162. video_url = video_response.get('url')
  163. clip_f = f.copy()
  164. clip_f.update({
  165. 'url': video_url,
  166. 'ext': ext,
  167. 'format_id': '%s-%s' % (ext, quality),
  168. 'quality': quality_key(quality),
  169. 'height': f['height']
  170. })
  171. formats.append(clip_f)
  172. self._sort_formats(formats)
  173. subtitles = self.extract_subtitles(lesson_hash, video_id)
  174. return {
  175. 'id': video_id,
  176. 'display_id': lesson_slug,
  177. 'title': lesson_title,
  178. 'description': lesson_description,
  179. 'chapter': lesson_section,
  180. 'chapter_number': lesson_section_number,
  181. 'thumbnail': lesson_thumbnail_url,
  182. 'formats': formats,
  183. 'subtitles': subtitles
  184. }
  185. class FrontEndMasterCourseIE(FrontEndMasterBaseIE):
  186. IE_NAME = 'frontend-masters:course'
  187. _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<courseid>[a-z\-]+)/?$'
  188. _NETRC_MACHINE = 'frontendmasters'
  189. _TEST = {
  190. 'url': 'https://frontendmasters.com/courses/javascript-basics/',
  191. 'info_dict': {
  192. 'id': 'javascript-basics',
  193. 'title': 'Introduction to JavaScript Programming',
  194. 'description': 'md5:269412fbb76d86954761599ad8e4cbc9'
  195. },
  196. 'playlist_count': 19,
  197. 'skip': 'Requires FrontendMasters account credentials'
  198. }
  199. @classmethod
  200. def suitable(cls, url):
  201. return False if FrontEndMasterIE.suitable(url) else super(FrontEndMasterBaseIE, cls).suitable(url)
  202. def _real_extract(self, url):
  203. mobj = re.match(self._VALID_URL, url)
  204. course_id = mobj.group('courseid')
  205. course_json_content = self._download_course(course_id=course_id,
  206. url=url)
  207. title = course_json_content.get('title')
  208. description = course_json_content.get('description')
  209. course_display_id = course_json_content.get('slug')
  210. videos_data = course_json_content.get('lessonData').values()
  211. videos_data = sorted(videos_data, key=lambda video: video.get('index'))
  212. entries = []
  213. for video in videos_data:
  214. video_slug = video.get('slug')
  215. clip_url = '%s/%s/%s' % (
  216. self._VIDEO_BASE, course_display_id, video_slug)
  217. entries.append({
  218. '_type': 'url_transparent',
  219. 'url': clip_url,
  220. 'ie_key': FrontEndMasterIE.ie_key()
  221. })
  222. return self.playlist_result(entries, course_id, title, description)