12 years ago · 545434670b
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -59,6 +59,7 @@ from .myvideo import MyVideoIE
 
				 from .nba import NBAIE
			
 
				 from .nbc import NBCNewsIE
			
 
				 from .ooyala import OoyalaIE
			
 
				+from .orf import ORFIE
			
 
				 from .pbs import PBSIE
			
 
				 from .photobucket import PhotobucketIE
			
 
				 from .pornotube import PornotubeIE
			
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -0,0 +1,65 @@
 
				+import re
			
 
				+import xml.etree.ElementTree
			
 
				+import json
			
 
				+
			
 
				+from .common import InfoExtractor
			
 
				+from ..utils import (
			
 
				+    compat_urlparse,
			
 
				+    ExtractorError,
			
 
				+    find_xpath_attr,
			
 
				+)
			
 
				+
			
 
				+class ORFIE(InfoExtractor):
			
 
				+    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
			
 
				+
			
 
				+    _TEST = {
			
 
				+        u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
			
 
				+        u'file': u'6566957.flv',
			
 
				+        u'info_dict': {
			
 
				+            u'title': u'Wetter',
			
 
				+            u'description': u'Christa Kummer, Marcus Wadsak und Kollegen  präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
			
 
				+        },
			
 
				+        u'params': {
			
 
				+            # It uses rtmp
			
 
				+            u'skip_download': True,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        mobj = re.match(self._VALID_URL, url)
			
 
				+        playlist_id = mobj.group('id')
			
 
				+        webpage = self._download_webpage(url, playlist_id)
			
 
				+
			
 
				+        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
			
 
				+        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
			
 
				+        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
			
 
				+        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
			
 
				+        playlist = json.loads(playlist_json)
			
 
				+
			
 
				+        videos = []
			
 
				+        ns = '{http://tempuri.org/XMLSchema.xsd}'
			
 
				+        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
			
 
				+        webpage_description = self._og_search_description(webpage)
			
 
				+        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
			
 
				+            # Get best quality url
			
 
				+            rtmp_url = None
			
 
				+            for q in ['Q6A', 'Q4A', 'Q1A']:
			
 
				+                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
			
 
				+                if video_url is not None:
			
 
				+                    rtmp_url = video_url.text
			
 
				+                    break
			
 
				+            if rtmp_url is None:
			
 
				+                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
			
 
				+            description = self._html_search_regex(
			
 
				+                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
			
 
				+                u'description', default=webpage_description, flags=re.DOTALL)
			
 
				+            videos.append({
			
 
				+                '_type': 'video',
			
 
				+                'id': info['id'],
			
 
				+                'title': info['title'],
			
 
				+                'url': rtmp_url,
			
 
				+                'ext': 'flv',
			
 
				+                'description': description,
			
 
				+                })
			
 
				+
			
 
				+        return videos
			
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -213,7 +213,7 @@ if sys.version_info >= (2,7):
 
				     def find_xpath_attr(node, xpath, key, val):
			
 
				         """ Find the xpath xpath[@key=val] """
			
 
				         assert re.match(r'^[a-zA-Z]+$', key)
			
 
				-        assert re.match(r'^[a-zA-Z@\s]*$', val)
			
 
				+        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
			
 
				         expr = xpath + u"[@%s='%s']" % (key, val)
			
 
				         return node.find(expr)
			
 
				 else: