Browse Source

Do not override stdlib html parser 'locatestarttagend' regex (fixes #4081)

'<a href="foo" ><img src="bar" / ></a>' wouldn't be parsed right (the problem is '/ >', '/>' worked fine).
We need to change it in python 2.6 (for example the description of youtube videos wouldn't be extracted).
Jaime Marquínez Ferrándiz 11 năm trước cách đây
mục cha
commit
4f195f55f0
1 tập tin đã thay đổi với 3 bổ sung1 xóa
  1. 3 1
      youtube_dl/utils.py

+ 3 - 1
youtube_dl/utils.py

@@ -152,7 +152,9 @@ def xpath_text(node, xpath, name=None, fatal=False):
     return n.text
 
 
-compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+if sys.version_info < (2, 7):
+    compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+
 class BaseHTMLParser(compat_html_parser.HTMLParser):
     def __init(self):
         compat_html_parser.HTMLParser.__init__(self)