newgrounds.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. extract_attributes,
  6. int_or_none,
  7. )
  8. class NewgroundsIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
  10. _TESTS = [{
  11. 'url': 'https://www.newgrounds.com/audio/listen/549479',
  12. 'md5': 'fe6033d297591288fa1c1f780386f07a',
  13. 'info_dict': {
  14. 'id': '549479',
  15. 'ext': 'mp3',
  16. 'title': 'B7 - BusMode',
  17. 'uploader': 'Burn7',
  18. }
  19. }, {
  20. 'url': 'https://www.newgrounds.com/portal/view/673111',
  21. 'md5': '3394735822aab2478c31b1004fe5e5bc',
  22. 'info_dict': {
  23. 'id': '673111',
  24. 'ext': 'mp4',
  25. 'title': 'Dancin',
  26. 'uploader': 'Squirrelman82',
  27. },
  28. }, {
  29. # source format unavailable, additional mp4 formats
  30. 'url': 'http://www.newgrounds.com/portal/view/689400',
  31. 'info_dict': {
  32. 'id': '689400',
  33. 'ext': 'mp4',
  34. 'title': 'ZTV News Episode 8',
  35. 'uploader': 'BennettTheSage',
  36. },
  37. 'params': {
  38. 'skip_download': True,
  39. },
  40. }]
  41. def _real_extract(self, url):
  42. media_id = self._match_id(url)
  43. webpage = self._download_webpage(url, media_id)
  44. title = self._html_search_regex(
  45. r'<title>([^>]+)</title>', webpage, 'title')
  46. video_url = self._parse_json(self._search_regex(
  47. r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
  48. formats = [{
  49. 'url': video_url,
  50. 'format_id': 'source',
  51. 'quality': 1,
  52. }]
  53. max_resolution = int_or_none(self._search_regex(
  54. r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
  55. default=None))
  56. if max_resolution:
  57. url_base = video_url.rpartition('.')[0]
  58. for resolution in (360, 720, 1080):
  59. if resolution > max_resolution:
  60. break
  61. formats.append({
  62. 'url': '%s.%dp.mp4' % (url_base, resolution),
  63. 'format_id': '%dp' % resolution,
  64. 'height': resolution,
  65. })
  66. self._check_formats(formats, media_id)
  67. self._sort_formats(formats)
  68. uploader = self._html_search_regex(
  69. r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
  70. fatal=False)
  71. return {
  72. 'id': media_id,
  73. 'title': title,
  74. 'uploader': uploader,
  75. 'formats': formats,
  76. }
  77. class NewgroundsPlaylistIE(InfoExtractor):
  78. _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
  79. _TESTS = [{
  80. 'url': 'https://www.newgrounds.com/collection/cats',
  81. 'info_dict': {
  82. 'id': 'cats',
  83. 'title': 'Cats',
  84. },
  85. 'playlist_mincount': 46,
  86. }, {
  87. 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
  88. 'info_dict': {
  89. 'id': 'ZONE-SAMA',
  90. 'title': 'Portal Search: ZONE-SAMA',
  91. },
  92. 'playlist_mincount': 47,
  93. }, {
  94. 'url': 'http://www.newgrounds.com/audio/search/title/cats',
  95. 'only_matching': True,
  96. }]
  97. def _real_extract(self, url):
  98. playlist_id = self._match_id(url)
  99. webpage = self._download_webpage(url, playlist_id)
  100. title = self._search_regex(
  101. r'<title>([^>]+)</title>', webpage, 'title', default=None)
  102. # cut left menu
  103. webpage = self._search_regex(
  104. r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
  105. webpage, 'wide column', default=webpage)
  106. entries = []
  107. for a, path, media_id in re.findall(
  108. r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
  109. webpage):
  110. a_class = extract_attributes(a).get('class')
  111. if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
  112. continue
  113. entries.append(
  114. self.url_result(
  115. 'https://www.newgrounds.com/%s' % path,
  116. ie=NewgroundsIE.ie_key(), video_id=media_id))
  117. return self.playlist_result(entries, playlist_id, title)