Browse Source

[utils] Fix unescapeHTML for misformed string like "&a"" (#13935)

Yen Chi Hsuan 8 years ago
parent
commit
95f3f7c20a
3 changed files with 8 additions and 1 deletions
  1. 6 0
      ChangeLog
  2. 1 0
      test/test_utils.py
  3. 1 1
      youtube_dl/utils.py

+ 6 - 0
ChangeLog

@@ -1,3 +1,9 @@
+version <unreleased>
+
+Core
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+
 version 2017.08.18
 
 Core

+ 1 - 0
test/test_utils.py

@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 

+ 1 - 1
youtube_dl/utils.py

@@ -596,7 +596,7 @@ def unescapeHTML(s):
     assert type(s) == compat_str
 
     return re.sub(
-        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def get_subprocess_encoding():