Forráskód Böngészése

Add ArteTVCategoryIE to support category playlists

df 4 éve
szülő
commit
ddc080a562
2 módosított fájl, 48 hozzáadás és 0 törlés
  1. 47 0
      youtube_dl/extractor/arte.py
  2. 1 0
      youtube_dl/extractor/extractors.py

+ 47 - 0
youtube_dl/extractor/arte.py

@@ -12,6 +12,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
+    strip_or_none,
     try_get,
     unified_strdate,
     url_or_none,
@@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
         title = collection.get('title')
         description = collection.get('shortDescription') or collection.get('teaserText')
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+    _TESTS = [{
+        'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+        'info_dict': {
+            'id': 'politics-and-society',
+            'title': 'Politics and society',
+            'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+        },
+        'playlist_mincount': 13,
+    },
+    ]
+
+    @classmethod
+    def suitable(cls, url):
+        return (
+            not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+            and super(ArteTVCategoryIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        lang, playlist_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, playlist_id)
+
+        items = []
+        for video in re.finditer(
+                r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+                webpage):
+            video = video.group('url')
+            if video == url:
+                continue
+            if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+                items.append(video)
+
+        if items:
+            title = (self._og_search_title(webpage, default=None)
+                     or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
+            title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+
+            result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
+            if result:
+                description = self._og_search_description(webpage, default=None)
+                if description:
+                    result['description'] = description
+                return result

+ 1 - 0
youtube_dl/extractor/extractors.py

@@ -71,6 +71,7 @@ from .arte import (
     ArteTVIE,
     ArteTVEmbedIE,
     ArteTVPlaylistIE,
+    ArteTVCategoryIE,
 )
 from .arnes import ArnesIE
 from .asiancrush import (