Browse Source

[karrierevideos] Fix extraction

The server serves malformed header "Content Type: text/xml" for the XML
request (it should be Content-Type but not Content Type). Python 3.x,
which uses email.feedparser rejects such headers. As a result,
Content-Encoding header is not parsed, so the returned content is kept
not decompressed, and thus XML parsing error.
Yen Chi Hsuan 9 years ago
parent
commit
f141fefab7
1 changed files with 4 additions and 1 deletions
  1. 4 1
      youtube_dl/extractor/karrierevideos.py

+ 4 - 1
youtube_dl/extractor/karrierevideos.py

@@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
 
 
         video_id = self._search_regex(
         video_id = self._search_regex(
             r'/config/video/(.+?)\.xml', webpage, 'video id')
             r'/config/video/(.+?)\.xml', webpage, 'video id')
+        # Server returns malformed headers
+        # Force Accept-Encoding: * to prevent gzipped results
         playlist = self._download_xml(
         playlist = self._download_xml(
             'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
             'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
-            video_id, transform_source=fix_xml_ampersands)
+            video_id, transform_source=fix_xml_ampersands,
+            headers={'Accept-Encoding': '*'})
 
 
         NS_MAP = {
         NS_MAP = {
             'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
             'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'