소스 검색

[francetvinfo] Improve video id extraction (closes #28792)

Sergey M․ 4 년 전
부모
커밋
57eaaff5cf
1개의 변경된 파일5개의 추가작업 그리고 1개의 파일을 삭제
  1. 5 1
      youtube_dl/extractor/francetv.py

+ 5 - 1
youtube_dl/extractor/francetv.py

@@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     }, {
     }, {
         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # "<figure id=" pattern (#28792)
+        'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -400,7 +404,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
             (r'player\.load[^;]+src:\s*["\']([^"\']+)',
             (r'player\.load[^;]+src:\s*["\']([^"\']+)',
              r'id-video=([^@]+@[^"]+)',
              r'id-video=([^@]+@[^"]+)',
              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
              r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
-             r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
+             r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
             webpage, 'video id')
             webpage, 'video id')
 
 
         return self._make_url_result(video_id)
         return self._make_url_result(video_id)