浏览代码

[generic] Fix rss under Python 2.x and move test to extractor

Philipp Hagemeister 11 年之前
父节点
当前提交
0990305d2a
共有 5 个文件被更改,包括 31 次插入12 次删除
  1. 4 1
      test/helper.py
  2. 3 1
      test/test_download.py
  3. 0 9
      test/test_playlists.py
  4. 10 0
      youtube_dl/extractor/generic.py
  5. 14 1
      youtube_dl/utils.py

+ 4 - 1
test/helper.py

@@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict):
             match_rex = re.compile(match_str)
             match_rex = re.compile(match_str)
 
 
             self.assertTrue(
             self.assertTrue(
-                isinstance(got, compat_str) and match_rex.match(got),
+                isinstance(got, compat_str),
+                'Expected a %r object, but got %r' % (compat_str, type(got)))
+            self.assertTrue(
+                match_rex.match(got),
                 u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                 u'field %s (value: %r) should match %r' % (info_field, got, match_str))
         elif isinstance(expected, type):
         elif isinstance(expected, type):
             got = got_dict.get(info_field)
             got = got_dict.get(info_field)

+ 3 - 1
test/test_download.py

@@ -7,6 +7,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 from test.helper import (
 from test.helper import (
+    assertGreaterEqual,
     get_params,
     get_params,
     gettestcases,
     gettestcases,
     expect_info_dict,
     expect_info_dict,
@@ -136,7 +137,8 @@ def generator(test_case):
                 self.assertEqual(res_dict['_type'], 'playlist')
                 self.assertEqual(res_dict['_type'], 'playlist')
                 expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
                 expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
             if 'playlist_mincount' in test_case:
             if 'playlist_mincount' in test_case:
-                self.assertGreaterEqual(
+                assertGreaterEqual(
+                    self,
                     len(res_dict['entries']),
                     len(res_dict['entries']),
                     test_case['playlist_mincount'],
                     test_case['playlist_mincount'],
                     'Expected at least %d in playlist %s, but got only %d' % (
                     'Expected at least %d in playlist %s, but got only %d' % (

+ 0 - 9
test/test_playlists.py

@@ -310,15 +310,6 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
         self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
         self.assertEqual(len(result['entries']), 3)
         self.assertEqual(len(result['entries']), 3)
 
 
-    def test_generic_rss_feed(self):
-        dl = FakeYDL()
-        ie = GenericIE(dl)
-        result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
-        self.assertEqual(result['title'], 'Zero Punctuation')
-        self.assertTrue(len(result['entries']) > 10)
-
     def test_ted_playlist(self):
     def test_ted_playlist(self):
         dl = FakeYDL()
         dl = FakeYDL()
         ie = TEDIE(dl)
         ie = TEDIE(dl)

+ 10 - 0
youtube_dl/extractor/generic.py

@@ -341,6 +341,16 @@ class GenericIE(InfoExtractor):
                 'uploader': 'www.handjobhub.com',
                 'uploader': 'www.handjobhub.com',
                 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
                 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
             }
             }
+        },
+        # RSS feed
+        {
+            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+            'info_dict': {
+                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+                'title': 'Zero Punctuation',
+                'description': 're:'
+            },
+            'playlist_mincount': 11,
         }
         }
     ]
     ]
 
 

+ 14 - 1
youtube_dl/utils.py

@@ -1458,6 +1458,12 @@ def urlencode_postdata(*args, **kargs):
     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
 
 
 
 
+try:
+    etree_iter = xml.etree.ElementTree.Element.iter
+except AttributeError:  # Python <=2.6
+    etree_iter = lambda n: n.findall('.//*')
+
+
 def parse_xml(s):
 def parse_xml(s):
     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
         def doctype(self, name, pubid, system):
         def doctype(self, name, pubid, system):
@@ -1465,7 +1471,14 @@ def parse_xml(s):
 
 
     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
-    return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+    # Fix up XML parser in Python 2.x
+    if sys.version_info < (3, 0):
+        for n in etree_iter(tree):
+            if n.text is not None:
+                if not isinstance(n.text, compat_str):
+                    n.text = n.text.decode('utf-8')
+    return tree
 
 
 
 
 if sys.version_info < (3, 0) and sys.platform == 'win32':
 if sys.version_info < (3, 0) and sys.platform == 'win32':