浏览代码

[googledrive] Fix extraction on Python 3.6

Since Python 3.6, invalid escape sequences are deprecated. It's likely
that there are invalid escape sequences somewhere on the webpage, so
instead of unescaping the whole webpage, just unescape the URL.

See https://bugs.python.org/issue27364. That change was designed for
string literals, while it affects the 'unicode_escape' encoding as well.
The code path is:

str.decode('unicode_escape')
    codecs.unicode_escape_decode()
        PyUnicode_DecodeUnicodeEscape()
Yen Chi Hsuan 8 年之前
父节点
当前提交
e4e50f60b1
共有 2 个文件被更改,包括 11 次插入4 次删除
  1. 6 0
      ChangeLog
  2. 5 4
      youtube_dl/extractor/googledrive.py

+ 6 - 0
ChangeLog

@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
+* [googledrive] Fix extraction on Python 3.6
+
+
 version 2017.02.04.1
 
 Extractors

+ 5 - 4
youtube_dl/extractor/googledrive.py

@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    lowercase_escape,
 )
 
 
@@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
     _TESTS = [{
         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
-        'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+        'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
         'info_dict': {
             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
             'ext': 'mp4',
             'title': 'Big Buck Bunny.mp4',
-            'duration': 46,
+            'duration': 45,
         }
     }, {
         # video id is longer than 28 characters
@@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
-            'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+            'http://docs.google.com/file/d/%s' % video_id, video_id)
 
         reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
         if reason:
@@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
             resolution = fmt.split('/')[1]
             width, height = resolution.split('x')
             formats.append({
-                'url': fmt_url,
+                'url': lowercase_escape(fmt_url),
                 'format_id': fmt_id,
                 'resolution': resolution,
                 'width': int_or_none(width),