Przeglądaj źródła

[wdr] Relax media link regex (closes #14447)

Sergey M․ 8 lat temu
rodzic
commit
8992331621
1 zmienionych plików z 7 dodań i 2 usunięć
  1. 7 2
      youtube_dl/extractor/wdr.py

+ 7 - 2
youtube_dl/extractor/wdr.py

@@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor):
         # for wdrmaus, in a tag with the class "videoButton" (previously a link
         # to the page in a multiline "videoLink"-tag)
         json_metadata = self._html_search_regex(
-            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
-            webpage, 'media link', default=None, flags=re.MULTILINE)
+            r'''(?sx)class=
+                    (?:
+                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+            ''',
+            webpage, 'media link', default=None, group='data')
 
         if not json_metadata:
             return