Bladeren bron

[youtube] Extract alt_title and creator for music videos (Closes #7862)

Sergey M․ 9 jaren geleden
bovenliggende
commit
0cb58b0259
1 gewijzigde bestanden met toevoegingen van 22 en 0 verwijderingen
  1. 22 0
      youtube_dl/extractor/youtube.py

+ 22 - 0
youtube_dl/extractor/youtube.py

@@ -33,6 +33,7 @@ from ..utils import (
     int_or_none,
     int_or_none,
     orderedSet,
     orderedSet,
     parse_duration,
     parse_duration,
+    remove_quotes,
     remove_start,
     remove_start,
     sanitized_Request,
     sanitized_Request,
     smuggle_url,
     smuggle_url,
@@ -395,12 +396,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'upload_date': '20120506',
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+                'alt_title': 'I Love It (feat. Charli XCX)',
                 'description': 'md5:782e8651347686cba06e58f71ab51773',
                 'description': 'md5:782e8651347686cba06e58f71ab51773',
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'iconic ep', 'iconic', 'love', 'it'],
                          'iconic ep', 'iconic', 'love', 'it'],
                 'uploader': 'Icona Pop',
                 'uploader': 'Icona Pop',
                 'uploader_id': 'IconaPop',
                 'uploader_id': 'IconaPop',
+                'creator': 'Icona Pop',
             }
             }
         },
         },
         {
         {
@@ -411,9 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'upload_date': '20130703',
                 'upload_date': '20130703',
                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+                'alt_title': 'Tunnel Vision',
                 'description': 'md5:64249768eec3bc4276236606ea996373',
                 'description': 'md5:64249768eec3bc4276236606ea996373',
                 'uploader': 'justintimberlakeVEVO',
                 'uploader': 'justintimberlakeVEVO',
                 'uploader_id': 'justintimberlakeVEVO',
                 'uploader_id': 'justintimberlakeVEVO',
+                'creator': 'Justin Timberlake',
                 'age_limit': 18,
                 'age_limit': 18,
             }
             }
         },
         },
@@ -492,10 +497,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'nfWlot6h_JM',
                 'id': 'nfWlot6h_JM',
                 'ext': 'm4a',
                 'ext': 'm4a',
                 'title': 'Taylor Swift - Shake It Off',
                 'title': 'Taylor Swift - Shake It Off',
+                'alt_title': 'Shake It Off',
                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
                 'upload_date': '20140818',
+                'creator': 'Taylor Swift',
             },
             },
             'params': {
             'params': {
                 'youtube_include_dash_manifest': True,
                 'youtube_include_dash_manifest': True,
@@ -551,9 +558,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'upload_date': '20100430',
                 'upload_date': '20100430',
                 'uploader_id': 'deadmau5',
                 'uploader_id': 'deadmau5',
+                'creator': 'deadmau5',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
                 'uploader': 'deadmau5',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'title': 'Deadmau5 - Some Chords (HD)',
+                'alt_title': 'Some Chords',
             },
             },
             'expected_warnings': [
             'expected_warnings': [
                 'DASH manifest missing',
                 'DASH manifest missing',
@@ -701,10 +710,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'lsguqyKfVQg',
                 'id': 'lsguqyKfVQg',
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+                'alt_title': 'Dark Walk',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'upload_date': '20151119',
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
                 'uploader_id': 'IronSoulElf',
                 'uploader': 'IronSoulElf',
                 'uploader': 'IronSoulElf',
+                'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
             },
             },
             'params': {
             'params': {
                 'skip_download': True,
                 'skip_download': True,
@@ -1308,6 +1319,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
         upload_date = unified_strdate(upload_date)
         upload_date = unified_strdate(upload_date)
 
 
+        m_music = re.search(
+            r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
+            video_webpage)
+        if m_music:
+            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+            video_creator = clean_html(m_music.group('creator'))
+        else:
+            video_alt_title = video_creator = None
+
         m_cat_container = self._search_regex(
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             video_webpage, 'categories', default=None)
             video_webpage, 'categories', default=None)
@@ -1537,7 +1557,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'uploader': video_uploader,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_id': video_uploader_id,
             'upload_date': upload_date,
             'upload_date': upload_date,
+            'creator': video_creator,
             'title': video_title,
             'title': video_title,
+            'alt_title': video_alt_title,
             'thumbnail': video_thumbnail,
             'thumbnail': video_thumbnail,
             'description': video_description,
             'description': video_description,
             'categories': video_categories,
             'categories': video_categories,