Browse Source

[compat] Fix compat_shlex_split for non-ASCII input

Closes #9871
Yen Chi Hsuan 9 years ago
parent
commit
dfe5fa49ae
2 changed files with 6 additions and 3 deletions
  1. 1 0
      test/test_compat.py
  2. 5 3
      youtube_dl/compat.py

+ 1 - 0
test/test_compat.py

@@ -88,6 +88,7 @@ class TestCompat(unittest.TestCase):
     def test_compat_shlex_split(self):
     def test_compat_shlex_split(self):
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
         self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
         self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
+        self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
 
 
     def test_compat_etree_fromstring(self):
     def test_compat_etree_fromstring(self):
         xml = '''
         xml = '''

+ 5 - 3
youtube_dl/compat.py

@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 import binascii
 import binascii
@@ -2594,15 +2595,16 @@ except ImportError:  # Python < 3.3
             return "'" + s.replace("'", "'\"'\"'") + "'"
             return "'" + s.replace("'", "'\"'\"'") + "'"
 
 
 
 
-if sys.version_info >= (2, 7, 3):
+try:
+    assert shlex.split('中文') == ['中文']
     compat_shlex_split = shlex.split
     compat_shlex_split = shlex.split
-else:
+except (AssertionError, UnicodeWarning, UnicodeEncodeError):
     # Working around shlex issue with unicode strings on some python 2
     # Working around shlex issue with unicode strings on some python 2
     # versions (see http://bugs.python.org/issue1548891)
     # versions (see http://bugs.python.org/issue1548891)
     def compat_shlex_split(s, comments=False, posix=True):
     def compat_shlex_split(s, comments=False, posix=True):
         if isinstance(s, compat_str):
         if isinstance(s, compat_str):
             s = s.encode('utf-8')
             s = s.encode('utf-8')
-        return shlex.split(s, comments, posix)
+        return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
 
 
 
 
 def compat_ord(c):
 def compat_ord(c):