瀏覽代碼

Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.
Philipp Hagemeister 12 年之前
父節點
當前提交
c1c9a79c49
共有 3 個文件被更改,包括 132 次插入0 次删除
  1. 32 0
      youtube_dl/YoutubeDL.py
  2. 4 0
      youtube_dl/__init__.py
  3. 96 0
      youtube_dl/utils.py

+ 32 - 0
youtube_dl/YoutubeDL.py

@@ -3,6 +3,7 @@
 
 
 from __future__ import absolute_import
 from __future__ import absolute_import
 
 
+import errno
 import io
 import io
 import os
 import os
 import re
 import re
@@ -84,6 +85,9 @@ class YoutubeDL(object):
     cachedir:          Location of the cache files in the filesystem.
     cachedir:          Location of the cache files in the filesystem.
                        None to disable filesystem cache.
                        None to disable filesystem cache.
     noplaylist:        Download single video instead of a playlist if in doubt.
     noplaylist:        Download single video instead of a playlist if in doubt.
+    downloadarchive:   File name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded
+                       again.
     
     
     The following parameters are not used by YoutubeDL itself, they are used by
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
     the FileDownloader:
@@ -309,6 +313,9 @@ class YoutubeDL(object):
             dateRange = self.params.get('daterange', DateRange())
             dateRange = self.params.get('daterange', DateRange())
             if date not in dateRange:
             if date not in dateRange:
                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        if self.in_download_archive(info_dict):
+            return (u'%(title)s) has already been recorded in archive'
+                    % info_dict)
         return None
         return None
         
         
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +585,8 @@ class YoutubeDL(object):
                     self.report_error(u'postprocessing: %s' % str(err))
                     self.report_error(u'postprocessing: %s' % str(err))
                     return
                     return
 
 
+        self.record_download_archive(info_dict)
+
     def download(self, url_list):
     def download(self, url_list):
         """Download a given list of URLs."""
         """Download a given list of URLs."""
         if len(url_list) > 1 and self.fixed_template():
         if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +626,26 @@ class YoutubeDL(object):
                 os.remove(encodeFilename(filename))
                 os.remove(encodeFilename(filename))
             except (IOError, OSError):
             except (IOError, OSError):
                 self.report_warning(u'Unable to remove downloaded video file')
                 self.report_warning(u'Unable to remove downloaded video file')
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        try:
+            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                for line in archive_file:
+                    if line.strip() == vid_id:
+                        return True
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT:
+                raise
+        return False
+
+    def record_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+            archive_file.write(vid_id + u'\n')

+ 4 - 0
youtube_dl/__init__.py

@@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
     selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
     selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')
 
 
 
 
     authentication.add_option('-u', '--username',
     authentication.add_option('-u', '--username',
@@ -631,6 +634,7 @@ def _real_main(argv=None):
         'daterange': date,
         'daterange': date,
         'cachedir': opts.cachedir,
         'cachedir': opts.cachedir,
         'youtube_print_sig_code': opts.youtube_print_sig_code,
         'youtube_print_sig_code': opts.youtube_print_sig_code,
+        'download_archive': opts.download_archive,
         })
         })
 
 
     if opts.verbose:
     if opts.verbose:

+ 96 - 0
youtube_dl/utils.py

@@ -830,3 +830,99 @@ def get_cachedir(params={}):
     cache_root = os.environ.get('XDG_CACHE_HOME',
     cache_root = os.environ.get('XDG_CACHE_HOME',
                                 os.path.expanduser('~/.cache'))
                                 os.path.expanduser('~/.cache'))
     return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
     return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+    import ctypes.wintypes
+    import msvcrt
+
+    class OVERLAPPED(ctypes.Structure):
+        _fields_ = [
+            ('Internal', ctypes.wintypes.LPVOID),
+            ('InternalHigh', ctypes.wintypes.LPVOID),
+            ('Offset', ctypes.wintypes.DWORD),
+            ('OffsetHigh', ctypes.wintypes.DWORD),
+            ('hEvent', ctypes.wintypes.HANDLE),
+        ]
+
+    kernel32 = ctypes.windll.kernel32
+    LockFileEx = kernel32.LockFileEx
+    LockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwFlags
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    LockFileEx.restype = ctypes.wintypes.BOOL
+    UnlockFileEx = kernel32.UnlockFileEx
+    UnlockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    UnlockFileEx.restype = ctypes.wintypes.BOOL
+    whole_low = 0xffffffff
+    whole_high = 0x7fffffff
+
+    def _lock_file(f, exclusive):
+        overlapped = OVERLAPPED()
+        overlapped.Offset = 0
+        overlapped.OffsetHigh = 0
+        overlapped.hEvent = 0
+        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+                          whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+    def _unlock_file(f):
+        assert f._lock_file_overlapped_p
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not UnlockFileEx(handle, 0,
+                            whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+    import fcntl
+
+    def _lock_file(f, exclusive):
+        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+    def _unlock_file(f):
+        fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+class locked_file(object):
+    def __init__(self, filename, mode, encoding=None):
+        assert mode in ['r', 'a', 'w']
+        self.f = io.open(filename, mode, encoding=encoding)
+        self.mode = mode
+
+    def __enter__(self):
+        exclusive = self.mode != 'r'
+        try:
+            _lock_file(self.f, exclusive)
+        except IOError:
+            self.f.close()
+            raise
+        return self
+
+    def __exit__(self, etype, value, traceback):
+        try:
+            _unlock_file(self.f)
+        finally:
+            self.f.close()
+
+    def __iter__(self):
+        return iter(self.f)
+
+    def write(self, *args):
+        return self.f.write(*args)
+
+    def read(self, *args):
+        return self.f.read(*args)