Browse Source

[utils] Skip invalid/non HTML entities (Closes #7518)

Sergey M․ 9 years ago
parent
commit
7aefc49c40
2 changed files with 7 additions and 3 deletions
  1. 2 2
      test/test_utils.py
  2. 5 1
      youtube_dl/utils.py

+ 2 - 2
test/test_utils.py

@@ -210,8 +210,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('%20;'), '%20;')
         self.assertEqual(unescapeHTML('/'), '/')
         self.assertEqual(unescapeHTML('/'), '/')
-        self.assertEqual(
-            unescapeHTML('é'), 'é')
+        self.assertEqual(unescapeHTML('é'), 'é')
+        self.assertEqual(unescapeHTML('�'), '�')
 
     def test_daterange(self):
         _20century = DateRange("19000101", "20000101")

+ 5 - 1
youtube_dl/utils.py

@@ -396,7 +396,11 @@ def _htmlentity_transform(entity):
             numstr = '0%s' % numstr
         else:
             base = 10
-        return compat_chr(int(numstr, base))
+        # See https://github.com/rg3/youtube-dl/issues/7518
+        try:
+            return compat_chr(int(numstr, base))
+        except ValueError:
+            pass
 
     # Unknown entity in name, return its literal representation
     return ('&%s;' % entity)