openload.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import os
  4. import re
  5. import subprocess
  6. import tempfile
  7. from .common import InfoExtractor
  8. from ..utils import (
  9. check_executable,
  10. determine_ext,
  11. encodeArgument,
  12. ExtractorError,
  13. )
  14. class OpenloadIE(InfoExtractor):
  15. _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
  16. _TESTS = [{
  17. 'url': 'https://openload.co/f/kUEfGclsU9o',
  18. 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
  19. 'info_dict': {
  20. 'id': 'kUEfGclsU9o',
  21. 'ext': 'mp4',
  22. 'title': 'skyrim_no-audio_1080.mp4',
  23. 'thumbnail': r're:^https?://.*\.jpg$',
  24. },
  25. }, {
  26. 'url': 'https://openload.co/embed/rjC09fkPLYs',
  27. 'info_dict': {
  28. 'id': 'rjC09fkPLYs',
  29. 'ext': 'mp4',
  30. 'title': 'movie.mp4',
  31. 'thumbnail': r're:^https?://.*\.jpg$',
  32. 'subtitles': {
  33. 'en': [{
  34. 'ext': 'vtt',
  35. }],
  36. },
  37. },
  38. 'params': {
  39. 'skip_download': True, # test subtitles only
  40. },
  41. }, {
  42. 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
  43. 'only_matching': True,
  44. }, {
  45. 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
  46. 'only_matching': True,
  47. }, {
  48. 'url': 'https://openload.co/f/_-ztPaZtMhM/',
  49. 'only_matching': True,
  50. }, {
  51. # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
  52. # for title and ext
  53. 'url': 'https://openload.co/embed/Sxz5sADo82g/',
  54. 'only_matching': True,
  55. }, {
  56. 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
  57. 'only_matching': True,
  58. }]
  59. _PHANTOMJS_SCRIPT = r'''
  60. phantom.onError = function(msg, trace) {
  61. var msgStack = ['PHANTOM ERROR: ' + msg];
  62. if(trace && trace.length) {
  63. msgStack.push('TRACE:');
  64. trace.forEach(function(t) {
  65. msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
  66. + (t.function ? ' (in function ' + t.function +')' : ''));
  67. });
  68. }
  69. console.error(msgStack.join('\n'));
  70. phantom.exit(1);
  71. };
  72. var page = require('webpage').create();
  73. page.settings.resourceTimeout = 10000;
  74. page.onInitialized = function() {
  75. page.evaluate(function() {
  76. delete window._phantom;
  77. delete window.callPhantom;
  78. });
  79. };
  80. page.open('https://openload.co/embed/%s/', function(status) {
  81. var info = page.evaluate(function() {
  82. return {
  83. decoded_id: document.getElementById('streamurl').innerHTML,
  84. title: document.querySelector('meta[name="og:title"],'
  85. + 'meta[name=description]').content
  86. };
  87. });
  88. console.log(info.decoded_id + ' ' + info.title);
  89. phantom.exit();
  90. });'''
  91. @staticmethod
  92. def _extract_urls(webpage):
  93. return re.findall(
  94. r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
  95. webpage)
  96. def _real_extract(self, url):
  97. exe = check_executable('phantomjs', ['-v'])
  98. if not exe:
  99. raise ExtractorError('PhantomJS executable not found in PATH, '
  100. 'download it from http://phantomjs.org',
  101. expected=True)
  102. video_id = self._match_id(url)
  103. url = 'https://openload.co/embed/%s/' % video_id
  104. webpage = self._download_webpage(url, video_id)
  105. if 'File not found' in webpage or 'deleted by the owner' in webpage:
  106. raise ExtractorError('File not found', expected=True, video_id=video_id)
  107. script_file = tempfile.NamedTemporaryFile(mode='w', delete=False)
  108. # write JS script to file and close it
  109. with script_file:
  110. script_file.write(self._PHANTOMJS_SCRIPT % video_id)
  111. self.to_screen('%s: Decoding video ID with PhantomJS' % video_id)
  112. p = subprocess.Popen([exe, '--ssl-protocol=any', script_file.name],
  113. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  114. output, err = p.communicate()
  115. if p.returncode != 0:
  116. raise ExtractorError('Decoding failed\n:'
  117. + encodeArgument(err))
  118. else:
  119. decoded_id, title = encodeArgument(output).strip().split(' ', 1)
  120. os.remove(script_file.name)
  121. video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
  122. entries = self._parse_html5_media_entries(url, webpage, video_id)
  123. entry = entries[0] if entries else {}
  124. subtitles = entry.get('subtitles')
  125. info_dict = {
  126. 'id': video_id,
  127. 'title': title,
  128. 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
  129. 'url': video_url,
  130. # Seems all videos have extensions in their titles
  131. 'ext': determine_ext(title, 'mp4'),
  132. 'subtitles': subtitles,
  133. }
  134. return info_dict