瀏覽代碼

Location: simplify regexes, parsing

There were still some relicts from pre-borgstore / borg 1.x in there:
- patterns about "::", used to be separator between repository and archive.
- patterns for //server/share (not supported by borgstore)

Also: unified ssh+sftp and file+socket processing.
Thomas Waldmann 7 月之前
父節點
當前提交
858fbae9a9
共有 3 個文件被更改,包括 38 次插入106 次删除
  1. 29 99
      src/borg/helpers/parseformat.py
  2. 0 1
      src/borg/testsuite/archiver/checks_test.py
  3. 9 6
      src/borg/testsuite/helpers_test.py

+ 29 - 99
src/borg/helpers/parseformat.py

@@ -401,6 +401,7 @@ def parse_stringified_list(s):
 class Location:
     """Object representing a repository location"""
 
+    # user@ (optional)
     # user must not contain "@", ":" or "/".
     # Quoting adduser error message:
     # "To avoid problems, the username should consist only of letters, digits,
@@ -408,34 +409,7 @@ class Location:
     # (as defined by IEEE Std 1003.1-2001)."
     # We use "@" as separator between username and hostname, so we must
     # disallow it within the pure username part.
-    optional_user_re = r"""
-        (?:(?P<user>[^@:/]+)@)?
-    """
-
-    # path must not contain :: (it ends at :: or string end), but may contain single colons.
-    # to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://".
-    local_path_re = r"""
-        (?!(:|//|ssh://|socket://))                         # not starting with ":" or // or ssh:// or socket://
-        (?P<path>([^:]|(:(?!:)))+)                          # any chars, but no "::"
-        """
-
-    # file_path must not contain :: (it ends at :: or string end), but may contain single colons.
-    # it must start with a / and that slash is part of the path.
-    file_path_re = r"""
-        (?P<path>(([^/]*)/([^:]|(:(?!:)))+))                # start opt. servername, then /, then any chars, but no "::"
-        """
-
-    # abs_path must not contain :: (it ends at :: or string end), but may contain single colons.
-    # it must start with a / and that slash is part of the path.
-    abs_path_re = r"""
-        (?P<path>(/([^:]|(:(?!:)))+))                       # start with /, then any chars, but no "::"
-        """
-
-    # path must not contain :: (it ends at :: or string end), but may contain single colons.
-    # it may or may not start with a /.
-    path_re = r"""
-        (?P<path>(([^:]|(:(?!:)))+))                        # any chars, but no "::"
-        """
+    optional_user_re = r"(?:(?P<user>[^@:/]+)@)?"
 
     # host NAME, or host IP ADDRESS (v4 or v6, v6 must be in square brackets)
     host_re = r"""
@@ -446,69 +420,38 @@ class Location:
         )
     """
 
-    # regexes for misc. kinds of supported location specifiers:
-    ssh_re = re.compile(
-        r"""
-        (?P<proto>ssh)://                                       # ssh://
-        """
-        + optional_user_re
-        + host_re
-        + r"""                 # user@  (optional), host name or address
-        (?::(?P<port>\d+))?/                                    # :port (optional) + "/" as separator
-        """
-        + path_re,
-        re.VERBOSE,
-    )  # path
+    # :port (optional)
+    optional_port_re = r"(?::(?P<port>\d+))?"
 
-    sftp_re = re.compile(
-        r"""
-        (?P<proto>sftp)://                                      # sftp://
-        """
-        + optional_user_re
-        + host_re
-        + r"""                 # user@  (optional), host name or address
-        (?::(?P<port>\d+))?/                                    # :port (optional) + "/" as separator
-        """
-        + path_re,
-        re.VERBOSE,
-    )  # path
+    # path may contain any chars. to avoid ambiguities with other regexes,
+    # it must not start with "//" nor with "scheme://" nor with "rclone:".
+    local_path_re = r"""
+        (?!(//|(ssh|socket|sftp|file)://|rclone:))
+        (?P<path>.+)
+    """
 
-    rclone_re = re.compile(
-        r"""
-        (?P<proto>rclone):                                      # rclone:
-        (?P<path>(.*))
-        """,
-        re.VERBOSE,
-    )  # path
+    # abs_path must start with a slash.
+    abs_path_re = r"(?P<path>/.+)"
 
-    socket_re = re.compile(
-        r"""
-        (?P<proto>socket)://                                    # socket://
-        """
-        + abs_path_re,
-        re.VERBOSE,
-    )  # path
+    # path may or may not start with a slash.
+    abs_or_rel_path_re = r"(?P<path>.+)"
 
-    file_re = re.compile(
-        r"""
-        (?P<proto>file)://                                      # file://
-        """
-        + file_path_re,
+    # regexes for misc. kinds of supported location specifiers:
+    ssh_or_sftp_re = re.compile(
+        r"(?P<proto>(ssh|sftp))://"
+        + optional_user_re
+        + host_re
+        + optional_port_re
+        + r"/"  # this is the separator, not part of the path!
+        + abs_or_rel_path_re,
         re.VERBOSE,
-    )  # servername/path or path
+    )
 
-    local_re = re.compile(local_path_re, re.VERBOSE)  # local path
+    rclone_re = re.compile(r"(?P<proto>rclone):(?P<path>(.*))", re.VERBOSE)
 
-    win_file_re = re.compile(
-        r"""
-        (?:file://)?                                        # optional file protocol
-        (?P<path>
-            (?:[a-zA-Z]:)?                                  # Drive letter followed by a colon (optional)
-            (?:[^:]+)                                       # Anything which does not contain a :, at least one char
-        )
-        """,
-        re.VERBOSE,
-    )
+    file_or_socket_re = re.compile(r"(?P<proto>(file|socket))://" + abs_path_re, re.VERBOSE)
+
+    local_re = re.compile(local_path_re, re.VERBOSE)
 
     def __init__(self, text="", overrides={}, other=False):
         self.repo_env_var = "BORG_OTHER_REPO" if other else "BORG_REPO"
@@ -538,15 +481,7 @@ class Location:
             raise ValueError('Invalid location format: "%s"' % self.processed)
 
     def _parse(self, text):
-        m = self.ssh_re.match(text)
-        if m:
-            self.proto = m.group("proto")
-            self.user = m.group("user")
-            self._host = m.group("host")
-            self.port = m.group("port") and int(m.group("port")) or None
-            self.path = os.path.normpath(m.group("path"))
-            return True
-        m = self.sftp_re.match(text)
+        m = self.ssh_or_sftp_re.match(text)
         if m:
             self.proto = m.group("proto")
             self.user = m.group("user")
@@ -559,12 +494,7 @@ class Location:
             self.proto = m.group("proto")
             self.path = m.group("path")
             return True
-        m = self.file_re.match(text)
-        if m:
-            self.proto = m.group("proto")
-            self.path = os.path.normpath(m.group("path"))
-            return True
-        m = self.socket_re.match(text)
+        m = self.file_or_socket_re.match(text)
         if m:
             self.proto = m.group("proto")
             self.path = os.path.normpath(m.group("path"))

+ 0 - 1
src/borg/testsuite/archiver/checks_test.py

@@ -250,7 +250,6 @@ def test_unknown_feature_on_mount(archivers, request):
     mountpoint = os.path.join(archiver.tmpdir, "mountpoint")
     os.mkdir(mountpoint)
     # XXX this might hang if it doesn't raise an error
-    archiver.repository_location += "::test"
     cmd_raises_unknown_feature(archiver, ["mount", mountpoint])
 
 

+ 9 - 6
src/borg/testsuite/helpers_test.py

@@ -278,14 +278,17 @@ class TestLocationWithoutEnv:
     def test_canonical_path(self, monkeypatch):
         monkeypatch.delenv("BORG_REPO", raising=False)
         locations = [
-            "some/path",
-            "file://some/path",
-            "host:some/path",
-            "host:~user/some/path",
-            "socket:///some/path",
+            "relative/path",
+            "/absolute/path",
+            "file:///absolute/path",
+            "socket:///absolute/path",
             "ssh://host/relative/path",
             "ssh://host//absolute/path",
-            "ssh://user@host:1234/some/path",
+            "ssh://user@host:1234/relative/path",
+            "sftp://host/relative/path",
+            "sftp://host//absolute/path",
+            "sftp://user@host:1234/relative/path",
+            "rclone:remote:path",
         ]
         for location in locations:
             assert (