123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- from __future__ import unicode_literals
- import re
- from .common import InfoExtractor
- from ..utils import (
- determine_ext,
- url_or_none,
- int_or_none,
- float_or_none,
- ExtractorError
- )
- class NineGagIE(InfoExtractor):
- IE_NAME = '9gag'
- _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[a-zA-Z0-9]+)'
- _TESTS = [{
- 'url': 'https://9gag.com/gag/an5Qz5b',
- 'info_dict': {
- 'id': 'an5Qz5b',
- 'ext': 'webm',
- 'title': 'Dogs playing tetherball',
- 'upload_date': '20191108',
- 'timestamp': 1573243994,
- 'age_limit': 0,
- 'categories': [
- 'Wholesome'
- ],
- 'tags': [
- 'Dog'
- ]
- }
- }, {
- 'url': 'https://9gag.com/gag/ae5Ag7B',
- 'info_dict': {
- 'id': 'ae5Ag7B',
- 'ext': 'webm',
- 'title': 'Capybara Agility Training',
- 'upload_date': '20191108',
- 'timestamp': 1573237208,
- 'age_limit': 0,
- 'categories': [
- 'Awesome'
- ],
- 'tags': [
- 'Weimaraner',
- 'American Pit Bull Terrier'
- ]
- }
- }]
- _EXTERNAL_VIDEO_PROVIDERS = {
- 'Youtube': 'https://youtube.com/watch?v=%s'
- }
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- rawJsonData = self._search_regex(
- r'window._config\s*=\s*JSON.parse\(["\']({.+?})["\']\);',
- webpage,
- 'data')
- rawJsonData = rawJsonData.replace('\\"', '"').replace('\\\\/', '/')
- data = self._parse_json(rawJsonData, video_id)['data']['post']
- if data['type'] == 'Video':
- vid = data['video']['id']
- ie_key = data['video']['source'].capitalize()
- return {
- '_type': 'url_transparent',
- 'url': self._EXTERNAL_VIDEO_PROVIDERS[ie_key] % vid,
- 'ie_key': ie_key,
- 'id': vid,
- 'duration': data['video'].get('duration'),
- 'start_time': data['video'].get('startTs')
- }
- if data['type'] == 'EmbedVideo':
- vid = data['video']['id']
- ie_key = data['video']['source'].capitalize()
- return {
- '_type': 'url_transparent',
- 'url': data['video']['embedUrl'],
- #'ie_key': vid,
- 'start_time': data['video'].get('startTs')
- }
- if data['type'] != 'Animated':
- raise ExtractorError(
- 'The given url does not contain a video',
- expected=True)
- duration = None
- formats = []
- thumbnails = []
- for key in data['images']:
- image = data['images'][key]
- if 'duration' in image and duration is None:
- duration = int_or_none(image['duration'])
- url = url_or_none(image.get('url'))
- if url == None:
- continue
- ext = determine_ext(url)
- if ext == 'jpg' or ext == 'png':
- thumbnail = {
- 'url': url,
- 'width': float_or_none(image.get('width')),
- 'height': float_or_none(image.get('height'))
- }
- thumbnails.append(thumbnail)
- elif ext == 'webm' or ext == 'mp4':
- formats.append({
- 'format_id': re.sub(r'.*_([^\.]+).(.*)', r'\1_\2', url),
- 'ext': ext,
- 'url': url,
- 'width': float_or_none(image.get('width')),
- 'height': float_or_none(image.get('height'))
- })
- section = None
- postSection = data.get('postSection')
- if postSection != None and 'name' in postSection:
- section = re.sub(r'\\[^\\]{5}', '', postSection['name'])
- age_limit = int_or_none(data.get('nsfw'))
- if age_limit != None:
- age_limit = age_limit * 18
- tags = None
- if 'tags' in data:
- tags = []
- for tag in data.get('tags') or []:
- tags.append(tag.get('key'))
- return {
- 'id': video_id,
- 'title': data['title'],
- 'timestamp': int_or_none(data.get('creationTs')),
- 'duration': duration,
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'like_count': int_or_none(data.get('upVoteCount')),
- 'dislike_count': int_or_none(data.get('downVoteCount')),
- 'comment_count': int_or_none(data.get('commentsCount')),
- 'age_limit': age_limit,
- 'categories': [section],
- 'tags': tags,
- 'is_live': False
- }
|