浏览代码

[utils] Transliterate "þ" as "th" (#20897)

Despite visual similarity "þ" is unrelated to "p".
It is normally transliterated as "th":

    $ echo þ-Þ | iconv -t ASCII//TRANSLIT
    th-TH
Jakub Wilk 6 年之前
父节点
当前提交
fd35d8cdfd
共有 2 个文件被更改,包括 3 次插入3 次删除
  1. 1 1
      test/test_utils.py
  2. 2 2
      youtube_dl/utils.py

+ 1 - 1
test/test_utils.py

@@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase):
 
         self.assertEqual(sanitize_filename(
             'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
-            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
 
     def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')

+ 2 - 2
youtube_dl/utils.py

@@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
 
 # needed for sanitizing filenames in restricted mode
 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
-                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
-                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 
 DATE_FORMATS = (
     '%d %B %Y',