瀏覽代碼

Allow iterators for playlist result entries

Philipp Hagemeister 10 年之前
父節點
當前提交
b82f815f37
共有 3 個文件被更改,包括 26 次插入16 次删除
  1. 14 6
      youtube_dl/YoutubeDL.py
  2. 2 2
      youtube_dl/extractor/common.py
  3. 10 8
      youtube_dl/extractor/youtube.py

+ 14 - 6
youtube_dl/YoutubeDL.py

@@ -7,6 +7,7 @@ import collections
 import datetime
 import datetime
 import errno
 import errno
 import io
 import io
+import itertools
 import json
 import json
 import locale
 import locale
 import os
 import os
@@ -654,21 +655,28 @@ class YoutubeDL(object):
             if playlistend == -1:
             if playlistend == -1:
                 playlistend = None
                 playlistend = None
 
 
-            if isinstance(ie_result['entries'], list):
-                n_all_entries = len(ie_result['entries'])
-                entries = ie_result['entries'][playliststart:playlistend]
+            ie_entries = ie_result['entries']
+            if isinstance(ie_entries, list):
+                n_all_entries = len(ie_entries)
+                entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
                 n_entries = len(entries)
                 self.to_screen(
                 self.to_screen(
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
-            else:
-                assert isinstance(ie_result['entries'], PagedList)
-                entries = ie_result['entries'].getslice(
+            elif isinstance(ie_entries, PagedList):
+                entries = ie_entries.getslice(
                     playliststart, playlistend)
                     playliststart, playlistend)
                 n_entries = len(entries)
                 n_entries = len(entries)
                 self.to_screen(
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
                     "[%s] playlist %s: Downloading %d videos" %
                     (ie_result['extractor'], playlist, n_entries))
                     (ie_result['extractor'], playlist, n_entries))
+            else:  # iterable
+                entries = list(itertools.islice(
+                    ie_entries, playliststart, playlistend))
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))
 
 
             for i, entry in enumerate(entries, 1):
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))

+ 2 - 2
youtube_dl/extractor/common.py

@@ -158,8 +158,8 @@ class InfoExtractor(object):
 
 
 
 
     _type "playlist" indicates multiple videos.
     _type "playlist" indicates multiple videos.
-    There must be a key "entries", which is a list or a PagedList object, each
-    element of which is a valid dictionary under this specfication.
+    There must be a key "entries", which is a list, an iterable, or a PagedList
+    object, each element of which is a valid dictionary by this specification.
 
 
     Additionally, playlists can have "title" and "id" attributes with the same
     Additionally, playlists can have "title" and "id" attributes with the same
     semantics as videos (see above).
     semantics as videos (see above).

+ 10 - 8
youtube_dl/extractor/youtube.py

@@ -1262,8 +1262,12 @@ class YoutubeChannelIE(InfoExtractor):
             # The videos are contained in a single page
             # The videos are contained in a single page
             # the ajax pages can't be used, they are empty
             # the ajax pages can't be used, they are empty
             video_ids = self.extract_videos_from_page(channel_page)
             video_ids = self.extract_videos_from_page(channel_page)
-        else:
-            # Download all channel pages using the json-based channel_ajax query
+            entries = [
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in video_ids]
+            return self.playlist_result(entries, channel_id)
+
+        def _entries():
             for pagenum in itertools.count(1):
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
                 page = self._download_json(
                 page = self._download_json(
@@ -1271,16 +1275,14 @@ class YoutubeChannelIE(InfoExtractor):
                     transform_source=uppercase_escape)
                     transform_source=uppercase_escape)
 
 
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
+                for video_id in ids_in_page:
+                    yield self.url_result(
+                        video_id, 'Youtube', video_id=video_id)
 
 
                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                     break
                     break
 
 
-        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
-        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                       for video_id in video_ids]
-        return self.playlist_result(url_entries, channel_id)
+        return self.playlist_result(_entries(), channel_id)
 
 
 
 
 class YoutubeUserIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):