Переглянути джерело

[biqle] fix extraction(closes #11471)(closes #15313)

Remita Amine 6 роки тому
батько
коміт
e7e3ec828b
1 змінених файлів з 69 додано та 12 видалено
  1. 69 12
      youtube_dl/extractor/biqle.py

+ 69 - 12
youtube_dl/extractor/biqle.py

@@ -2,39 +2,96 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .vk import VKIE
+from ..utils import (
+    HEADRequest,
+    int_or_none,
+)
 
 
 class BIQLEIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
     _TESTS = [{
-        'url': 'http://www.biqle.ru/watch/847655_160197695',
-        'md5': 'ad5f746a874ccded7b8f211aeea96637',
+        # Youtube embed
+        'url': 'https://biqle.ru/watch/-115995369_456239081',
+        'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
         'info_dict': {
-            'id': '160197695',
+            'id': '8v4f-avW-VI',
             'ext': 'mp4',
-            'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
-            'uploader': 'Andrey Rogozin',
-            'upload_date': '20110605',
-        }
+            'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
+            'description': 'Passe-Partout',
+            'uploader_id': 'mrsimpsonstef3',
+            'uploader': 'Phanolito',
+            'upload_date': '20120822',
+        },
     }, {
-        'url': 'https://biqle.org/watch/-44781847_168547604',
+        'url': 'http://biqle.org/watch/-44781847_168547604',
         'md5': '7f24e72af1db0edf7c1aaba513174f97',
         'info_dict': {
             'id': '168547604',
             'ext': 'mp4',
             'title': 'Ребенок в шоке от автоматической мойки',
+            'timestamp': 1396633454,
             'uploader': 'Dmitry Kotov',
+            'upload_date': '20140404',
+            'uploader_id': '47850140',
         },
-        'skip': ' This video was marked as adult.  Embedding adult videos on external sites is prohibited.',
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         embed_url = self._proto_relative_url(self._search_regex(
-            r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
+            r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>',
+            webpage, 'embed url'))
+        if VKIE.suitable(embed_url):
+            return self.url_result(embed_url, VKIE.ie_key(), video_id)
+
+        self._request_webpage(
+            HEADRequest(embed_url), video_id, headers={'Referer': url})
+        video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
+        item = self._download_json(
+            'https://api.vk.com/method/video.get', video_id,
+            headers={'User-Agent': 'okhttp/3.4.1'}, query={
+                'access_token': access_token,
+                'sig': sig,
+                'v': 5.44,
+                'videos': video_id,
+            })['response']['items'][0]
+        title = item['title']
+
+        formats = []
+        for f_id, f_url in item.get('files', {}).items():
+            if f_id == 'external':
+                return self.url_result(f_url)
+            ext, height = f_id.split('_')
+            formats.append({
+                'format_id': height + 'p',
+                'url': f_url,
+                'height': int_or_none(height),
+                'ext': ext,
+            })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for k, v in item.items():
+            if k.startswith('photo_') and v:
+                width = k.replace('photo_', '')
+                thumbnails.append({
+                    'id': width,
+                    'url': v,
+                    'width': int_or_none(width),
+                })
 
         return {
-            '_type': 'url_transparent',
-            'url': embed_url,
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'comment_count': int_or_none(item.get('comments')),
+            'description': item.get('description'),
+            'duration': int_or_none(item.get('duration')),
+            'thumbnails': thumbnails,
+            'timestamp': int_or_none(item.get('date')),
+            'uploader': item.get('owner_id'),
+            'view_count': int_or_none(item.get('views')),
         }