Browse Source

[generic] Follow redirects specified by `Refresh` HTTP header

Sergey M․ 10 years ago
parent
commit
84f8101606
1 changed files with 7 additions and 1 deletions
  1. 7 1
      youtube_dl/extractor/generic.py

+ 7 - 1
youtube_dl/extractor/generic.py

@@ -1270,8 +1270,14 @@ class GenericIE(InfoExtractor):
         if not found:
         if not found:
             found = re.search(
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
-                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)',
                 webpage)
                 webpage)
+            if not found:
+                # Look also in Refresh HTTP header
+                refresh_header = head_response.headers.get('Refresh')
+                if refresh_header:
+                    found = re.search(
+                        r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header)
             if found:
             if found:
                 new_url = found.group(1)
                 new_url = found.group(1)
                 self.report_following_redirect(new_url)
                 self.report_following_redirect(new_url)