Browse Source

Support patterns on extraction, fixes #361

This change implements the functionality requested in issue #361:
extracting files with a given extension. It does so by permitting
patterns to be used instead plain prefix paths. The pattern styles
supported are the same as for exclusions.
Michael Hanselmann 9 years ago
parent
commit
ceae4a9fa8
2 changed files with 50 additions and 5 deletions
  1. 17 5
      borg/archiver.py
  2. 33 0
      borg/testsuite/archiver.py

+ 17 - 5
borg/archiver.py

@@ -286,13 +286,25 @@ class Archiver:
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
         archive = Archive(repository, key, manifest, args.location.archive,
         archive = Archive(repository, key, manifest, args.location.archive,
                           numeric_owner=args.numeric_owner)
                           numeric_owner=args.numeric_owner)
-        patterns = adjust_patterns(args.paths, args.excludes)
+
+        matcher = PatternMatcher()
+        if args.excludes:
+            matcher.add(args.excludes, False)
+
+        include_patterns = []
+
+        if args.paths:
+            include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
+            matcher.add(include_patterns, True)
+
+        matcher.fallback = not include_patterns
+
         dry_run = args.dry_run
         dry_run = args.dry_run
         stdout = args.stdout
         stdout = args.stdout
         sparse = args.sparse
         sparse = args.sparse
         strip_components = args.strip_components
         strip_components = args.strip_components
         dirs = []
         dirs = []
-        for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
+        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
             orig_path = item[b'path']
             orig_path = item[b'path']
             if strip_components:
             if strip_components:
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
@@ -317,8 +329,8 @@ class Archiver:
         if not args.dry_run:
         if not args.dry_run:
             while dirs:
             while dirs:
                 archive.extract_item(dirs.pop(-1))
                 archive.extract_item(dirs.pop(-1))
-        for pattern in (patterns or []):
-            if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0:
+        for pattern in include_patterns:
+            if pattern.match_count == 0:
                 self.print_warning("Include pattern '%s' never matched.", pattern)
                 self.print_warning("Include pattern '%s' never matched.", pattern)
         return self.exit_code
         return self.exit_code
 
 
@@ -965,7 +977,7 @@ class Archiver:
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to extract')
                                help='archive to extract')
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
-                               help='paths to extract')
+                               help='paths to extract; patterns are supported')
 
 
         rename_epilog = textwrap.dedent("""
         rename_epilog = textwrap.dedent("""
         This command renames an archive in the repository.
         This command renames an archive in the repository.

+ 33 - 0
borg/testsuite/archiver.py

@@ -562,6 +562,39 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
             self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
         self.assert_equal(sorted(os.listdir('output/input')), ['file3'])
         self.assert_equal(sorted(os.listdir('output/input')), ['file3'])
 
 
+    def test_extract_with_pattern(self):
+        self.cmd("init", self.repository_location)
+        self.create_regular_file("file1", size=1024 * 80)
+        self.create_regular_file("file2", size=1024 * 80)
+        self.create_regular_file("file3", size=1024 * 80)
+        self.create_regular_file("file4", size=1024 * 80)
+        self.create_regular_file("file333", size=1024 * 80)
+
+        self.cmd("create", self.repository_location + "::test", "input")
+
+        # Extract everything with regular expression
+        with changedir("output"):
+            self.cmd("extract", self.repository_location + "::test", "re:.*")
+        self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file3", "file333", "file4"])
+        shutil.rmtree("output/input")
+
+        # Extract with pattern while also excluding files
+        with changedir("output"):
+            self.cmd("extract", "--exclude=re:file[34]$", self.repository_location + "::test", r"re:file\d$")
+        self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2"])
+        shutil.rmtree("output/input")
+
+        # Combine --exclude with pattern for extraction
+        with changedir("output"):
+            self.cmd("extract", "--exclude=input/file1", self.repository_location + "::test", "re:file[12]$")
+        self.assert_equal(sorted(os.listdir("output/input")), ["file2"])
+        shutil.rmtree("output/input")
+
+        # Multiple pattern
+        with changedir("output"):
+            self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
+        self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])
+
     def test_exclude_caches(self):
     def test_exclude_caches(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)