Pārlūkot izejas kodu

[utils] Skip invalid/non HTML entities (Closes #7518)

Sergey M․ 9 gadi atpakaļ
vecāks
revīzija
7aefc49c40
2 mainītis faili ar 7 papildinājumiem un 3 dzēšanām
  1. 2 2
      test/test_utils.py
  2. 5 1
      youtube_dl/utils.py

+ 2 - 2
test/test_utils.py

@@ -210,8 +210,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('%20;'), '%20;')
         self.assertEqual(unescapeHTML('%20;'), '%20;')
         self.assertEqual(unescapeHTML('/'), '/')
         self.assertEqual(unescapeHTML('/'), '/')
         self.assertEqual(unescapeHTML('/'), '/')
         self.assertEqual(unescapeHTML('/'), '/')
-        self.assertEqual(
-            unescapeHTML('é'), 'é')
+        self.assertEqual(unescapeHTML('é'), 'é')
+        self.assertEqual(unescapeHTML('�'), '�')
 
 
     def test_daterange(self):
     def test_daterange(self):
         _20century = DateRange("19000101", "20000101")
         _20century = DateRange("19000101", "20000101")

+ 5 - 1
youtube_dl/utils.py

@@ -396,7 +396,11 @@ def _htmlentity_transform(entity):
             numstr = '0%s' % numstr
             numstr = '0%s' % numstr
         else:
         else:
             base = 10
             base = 10
-        return compat_chr(int(numstr, base))
+        # See https://github.com/rg3/youtube-dl/issues/7518
+        try:
+            return compat_chr(int(numstr, base))
+        except ValueError:
+            pass
 
 
     # Unknown entity in name, return its literal representation
     # Unknown entity in name, return its literal representation
     return ('&%s;' % entity)
     return ('&%s;' % entity)