Browse Source

[radiojavan] Simplify and extract upload date

Sergey M․ 10 năm trước cách đây
mục cha
commit
7cf97daf77
1 tập tin đã thay đổi với 35 bổ sung40 xóa
  1. 35 40
      youtube_dl/extractor/radiojavan.py

+ 35 - 40
youtube_dl/extractor/radiojavan.py

@@ -1,12 +1,14 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import(
-    parse_duration,
-    str_to_int
+    unified_strdate,
+    str_to_int,
 )
 
+
 class RadioJavanIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
     _TEST = {
@@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor):
             'id': 'chaartaar-ashoobam',
             'ext': 'mp4',
             'title': 'Chaartaar - Ashoobam',
-            'description': 'Chaartaar - Ashoobam',
             'thumbnail': 're:^https?://.*\.jpe?g$',
+            'upload_date': '20150215',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
         }
     }
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        urls = list()
-        prefix = 'https://media.rdjavan.com/media/music_video/'
+        video_id = self._match_id(url)
 
-        video_url_480 = self._search_regex(
-            r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False)
-        video_url_720 = self._search_regex(
-            r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False)
-        video_url_1080 = self._search_regex(
-            r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False)
+        webpage = self._download_webpage(url, video_id)
 
-        if video_url_480:
-            urls.append({'url': prefix + video_url_480, 'format': '480p'})
-        if video_url_720:
-            urls.append({'url': prefix + video_url_720, 'format': '720p'})
-        if video_url_1080:
-            urls.append({'url': prefix + video_url_1080, 'format': '1080p'})
+        formats = [{
+            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+            'format_id': '%sp' % height,
+            'height': height,
+        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
 
         title = self._og_search_title(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
-        formats = [{
-            'url': url['url'],
-            'format': url['format']
-        } for url in urls]
 
-        likes = self._search_regex(
-            r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False )
-        likes = likes.replace(',', '')
-        dislikes = self._search_regex(
-            r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False )
-        dislikes = dislikes.replace(',', '')
+        upload_date = unified_strdate(self._search_regex(
+            r'class="date_added">Date added: ([^<]+)<',
+            webpage, 'upload date', fatal=False))
 
-        plays = self._search_regex(
-            r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False )
-        plays = plays.replace(',', '')
+        view_count = str_to_int(self._search_regex(
+            r'class="views">Plays: ([\d,]+)',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) likes',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) dislikes',
+            webpage, 'dislike count', fatal=False))
 
         return {
-            'formats': formats,
-            'id': display_id,
+            'id': video_id,
             'title': title,
-            'description': title, # no description provided in RadioJavan
             'thumbnail': thumbnail,
-            'like_count': str_to_int(likes),
-            'dislike_count': str_to_int(dislikes),
-            'viewCount': str_to_int(plays)
-        }
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'formats': formats,
+        }