Преглед на файлове

Do not override stdlib html parser 'locatestarttagend' regex (fixes #4081)

'<a href="foo" ><img src="bar" / ></a>' wouldn't be parsed right (the problem is '/ >', '/>' worked fine).
We need to change it in python 2.6 (for example the description of youtube videos wouldn't be extracted).
Jaime Marquínez Ferrándiz преди 11 години
родител
ревизия
4f195f55f0
променени са 1 файла, в които са добавени 3 реда и са изтрити 1 реда
  1. 3 1
      youtube_dl/utils.py

+ 3 - 1
youtube_dl/utils.py

@@ -152,7 +152,9 @@ def xpath_text(node, xpath, name=None, fatal=False):
     return n.text
 
 
-compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+if sys.version_info < (2, 7):
+    compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+
 class BaseHTMLParser(compat_html_parser.HTMLParser):
     def __init(self):
         compat_html_parser.HTMLParser.__init__(self)