diff --git a/src/bsv/path_map.py b/src/bsv/path_map.py
new file mode 100644
index 0000000..fd72487
--- /dev/null
+++ b/src/bsv/path_map.py
@@ -0,0 +1,58 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+from dataclasses import dataclass
+from itertools import pairwise
+
+from pathlib import Path, PurePosixPath
+from typing import Any
+
+
+@dataclass(order=True, frozen=True, slots=True)
+class PathPair:
+ bsv: PurePosixPath
+ fs: Path
+
+ def __post_init__(self):
+ if not self.bsv.is_absolute() or not self.fs.is_absolute():
+ raise ValueError("paths in path_map must be absolute")
+
+ @classmethod
+ def from_obj(cls, obj: dict[str, str]) -> PathPair:
+ return cls(
+ bsv = PurePosixPath(obj["bsv"]),
+ fs = Path(obj["fs"]),
+ )
+
+
+class PathMap:
+ paths: list[PathPair]
+
+ def __init__(self, paths: list[PathPair]=[]):
+ self.paths = sorted(paths)
+ for path0, path1 in pairwise(self.paths):
+ if path0 == path1 or path1.bsv.relative_to(path0.bsv):
+ raise ValueError("bsv paths must be unique and independent")
+
+ @classmethod
+ def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
+ return cls([
+ PathPair.from_obj(item)
+ for item in obj
+ ])
+
+ def clone(self) -> PathMap:
+ return PathMap(self.paths)
diff --git a/src/bsv/repository.py b/src/bsv/repository.py
index 065b942..3332ba2 100644
--- a/src/bsv/repository.py
+++ b/src/bsv/repository.py
@@ -19,19 +19,23 @@ from dataclasses import dataclass
from datetime import datetime as DateTime
import hashlib
from io import BytesIO
-from pathlib import Path, PurePosixPath
+from pathlib import Path
import platform
import tomllib
-from typing import Any, BinaryIO, Callable, Type
+from typing import TYPE_CHECKING, BinaryIO, Callable, Type
from fastcdc import fastcdc
import tomlkit
from bsv import __version__
from bsv.exception import ConfigError
+from bsv.path_map import PathMap
from bsv.simple_cas import SimpleCas
from bsv.simple_cas.cas import Digest, SimpleCas
-from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
+from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
+
+if TYPE_CHECKING:
+ from bsv.tree_walker import TreeWalker
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
@@ -48,7 +52,7 @@ class Repository:
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
- _path_map: list[PathPair]
+ _path_map: PathMap
# _remotes: list[object]
_context_depth: int = 0
@@ -72,10 +76,7 @@ class Repository:
self._avg_chunk_size = bsv.get("avg_chunk_size")
self._max_chunk_size = bsv.get("max_chunk_size")
- self._path_map = [
- PathPair.from_obj(pair)
- for pair in bsv.get("path_map", [])
- ]
+ self._path_map = PathMap.from_obj(bsv.get("path_map", []))
@property
def path(self) -> Path:
@@ -90,14 +91,14 @@ class Repository:
return self._name
@property
- def path_map(self) -> list[PathPair]:
- return list(self._path_map)
+ def path_map(self) -> PathMap:
+ return self._path_map.clone()
def get_blob(self, digest: Digest) -> Blob:
with self:
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
- def add_blob(self, stream: BinaryIO, dry_run: bool=False) -> Digest:
+ def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
with self:
return self._write(b"blob", stream, dry_run=dry_run)
@@ -105,11 +106,11 @@ class Repository:
with self:
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
- def add_tree(self, tree: Tree, dry_run: bool=False) -> Digest:
+ def add_tree(self, tree: Tree, *, dry_run: bool=False) -> Digest:
with self:
return self._cas.write(b"tree", tree.to_bytes(), dry_run=dry_run)
- def add_tree_from_path(self, path: Path, dry_run: bool=False) -> Digest:
+ def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> Digest:
from bsv.tree_walker import TreeWalker
walker = TreeWalker(self, dry_run=dry_run)
return walker.add_tree(path)
@@ -118,16 +119,44 @@ class Repository:
with self:
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
- def add_snapshot(self, snapshot: Snapshot, dry_run: bool=False) -> Digest:
+ def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> Digest:
with self:
return self._cas.write(b"snap", snapshot.to_bytes(), dry_run=dry_run)
+ # def take_snapshot(
+ # self,
+ # parent_digests: list[Digest] = [],
+ # *,
+ # walker_type: Type[TreeWalker] | None = None,
+ # dry_run: bool = False,
+ # ):
+ # from bsv.tree_walker import TreeWalker
+
+ # walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
+
+ # # parents = [
+ # # self.get_snapshot(digest)
+ # # for digest in parent_digests
+ # # ]
+ # parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
+
+ # snapshot = Snapshot(
+ # repo = self,
+ # tree_digest = walker.add_virtual_tree(self._path_map, parent=),
+ # parents = parent_digests,
+ # repo_name = self._name,
+ # timestamp = timestamp_us_from_time(DateTime.now()),
+ # )
+ # return self.add_snapshot(snapshot, dry_run=dry_run)
+
+
+
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
obj = self._cas.read(digest, object_type=object_type)
stream = BytesIO(obj.data)
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
- def _write(self, object_type: bytes, stream: BinaryIO, dry_run: bool=False) -> Digest:
+ def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
out = BytesIO()
size = 0
for chunk in fastcdc(
@@ -332,7 +361,7 @@ class TreeItem:
object_type: bytes
size: int
permissions: int
- modification_timestamp: int
+ modification_timestamp_us: int
name: str
def __init__(
@@ -341,7 +370,7 @@ class TreeItem:
object_type: bytes,
size: int,
permissions: int,
- modification_timestamp: int,
+ modification_timestamp_us: int,
name: str,
):
if "/\\" in name:
@@ -350,15 +379,15 @@ class TreeItem:
self.object_type = object_type
self.size = size
self.permissions = permissions
- self.modification_timestamp = modification_timestamp
+ self.modification_timestamp_us = modification_timestamp_us
self.name = name
@property
def modification_time(self) -> DateTime:
- return time_from_timestamp(self.modification_timestamp)
+ return time_from_timestamp_us(self.modification_timestamp_us)
@modification_time.setter
def modification_time(self, time: DateTime):
- self.modification_timestamp = timestamp_from_time(time)
+ self.modification_timestamp_us = timestamp_us_from_time(time)
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
@@ -370,7 +399,7 @@ class TreeItem:
object_type = read_exact(stream, 4),
size = int.from_bytes(read_exact(stream, 8)),
permissions = int.from_bytes(read_exact(stream, 2)),
- modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
+ modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
)
@@ -379,7 +408,7 @@ class TreeItem:
stream.write(self.object_type)
stream.write(self.size.to_bytes(8))
stream.write(self.permissions.to_bytes(2))
- stream.write(self.modification_timestamp.to_bytes(8, signed=True))
+ stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
name_bytes = self.name.encode("utf-8")
stream.write(len(name_bytes).to_bytes(2))
stream.write(name_bytes)
@@ -391,23 +420,31 @@ class TreeItem:
class Snapshot:
repo: Repository
tree_digest: Digest
+ parents: list[Digest]
repo_name: str
- timestamp: int
+ timestamp_us: int
+
+ def __post_init__(self):
+ assert len(self.parents) < 256
@property
def time(self) -> DateTime:
- return time_from_timestamp(self.timestamp)
+ return time_from_timestamp_us(self.timestamp_us)
@time.setter
def time(self, time: DateTime):
- self.timestamp = timestamp_from_time(time)
+ self.timestamp_us = timestamp_us_from_time(time)
@classmethod
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
return Snapshot(
repo = repo,
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
+ parents = [
+ Digest(read_exact(stream, repo._cas._digest_size))
+ for _ in range(int.from_bytes(read_exact(stream, 1)))
+ ],
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
- timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
+ timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
)
@classmethod
@@ -416,40 +453,17 @@ class Snapshot:
return cls.from_stream(repo, stream)
def write(self, stream: BinaryIO):
+ assert len(self.parents) < 256
stream.write(self.tree_digest.digest)
+ stream.write(len(self.parents).to_bytes(1))
+ for parent in self.parents:
+ stream.write(parent.digest)
repo_name_bytes = self.repo_name.encode("utf-8")
stream.write(len(repo_name_bytes).to_bytes(2))
stream.write(repo_name_bytes)
- stream.write(self.timestamp.to_bytes(8, signed=True))
+ stream.write(self.timestamp_us.to_bytes(8, signed=True))
def to_bytes(self) -> bytes:
stream = BytesIO()
self.write(stream)
return stream.getvalue()
-
-
-
-
-class PathPair:
- bsv: PurePosixPath
- fs: Path
-
- def __init__(self, bsv: PurePosixPath, fs: Path):
- self.bsv = bsv
- self.fs = fs
-
- @classmethod
- def from_obj(cls, obj: dict[str, Any]) -> PathPair:
- bsv = PurePosixPath(obj["bsv"])
- fs = Path(obj["fs"])
-
- if not bsv.is_absolute() or not fs.is_absolute():
- raise ValueError("paths in path_map must be absolute")
-
- return cls(
- bsv = obj["bsv"],
- fs = obj["fs"],
- )
-
- def __lt__(self, rhs: PathPair) -> bool:
- return self.bsv < rhs.bsv
diff --git a/src/bsv/tree_walker.py b/src/bsv/tree_walker.py
index 228303d..df25cb4 100644
--- a/src/bsv/tree_walker.py
+++ b/src/bsv/tree_walker.py
@@ -15,12 +15,14 @@
# along with this program. If not, see .
from __future__ import annotations
+from datetime import datetime as DateTime, timedelta as TimeDelta
from enum import Enum
from os import stat_result
from pathlib import Path
import stat
from bsv.object import Digest
+from bsv.path_map import PathMap
from bsv.repository import Repository, Tree, TreeItem
from bsv.util import is_bsv_repository, object_type_from_mode
@@ -28,9 +30,20 @@ from bsv.util import is_bsv_repository, object_type_from_mode
class Action(Enum):
ADD = "add"
UPDATE = "update"
+ REMOVE = "remove"
IGNORE = "ignore"
ERROR = "error"
+ @classmethod
+ def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
+ assert digest
+ if not source_digest:
+ return Action.ADD, None
+ elif source_digest == digest:
+ return Action.IGNORE, IgnoreCause.UNCHANGED
+ else:
+ return Action.UPDATE, None
+
class IgnoreCause(Enum):
IGNORE_RULE = "ignore_rule"
UNCHANGED = "unchanged"
@@ -39,88 +52,177 @@ class IgnoreCause(Enum):
class TreeWalker:
_repo: Repository
+ _time_rounding_us: int = 2000000
+ _force_hash: bool = False
_dry_run: bool = False
- def __init__(self, repo: Repository, dry_run: bool=False):
+ def __init__(
+ self,
+ repo: Repository,
+ *,
+ time_rounding_us: int = 2000000,
+ force_hash: bool = False,
+ dry_run: bool = False,
+ ):
self._repo = repo
+ self._time_rounding_us = time_rounding_us
+ self._force_hash = force_hash
self._dry_run = dry_run
- def add_tree(self, path: Path) -> Digest:
+ # def add_virtual_tree(self, paths: PathMap) -> Digest:
+ # root = {}
+ # for pair in paths.paths:
+ # vdir = root
+ # for part in pair.bsv.parts[:-1]:
+ # vdir = vdir.setdefault(part, {})
+ # vdir[pair.bsv.parts[-1]] = pair.fs
+
+ # return self._add_virtual_tree(root)
+
+ # def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
+ # tree = Tree(self._repo, [])
+ # for name, value in vtree.items():
+ # if isinstance(value, dict):
+ # digest = self._add_virtual_tree(value)
+ # elif isinstance(value, Path):
+ # digest = self.add_tree(value)
+ # else:
+ # raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
+ # tree.items.append(TreeItem(
+ # digest = digest,
+ # object_type = b"tree",
+ # size = 0,
+ # permissions = 0o766,
+ # modification_timestamp = timestamp_us_from_time(DateTime.now()),
+ # name = name,
+ # ))
+ # return self._repo.add_tree(tree, dry_run=self._dry_run)
+
+ def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest:
pstat = path.stat(follow_symlinks=False)
if self.ignore(path, pstat):
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
return Digest()
- return self._add_tree(path, pstat)
+ return self._add_tree(path, pstat, source_digest=source_digest)
+
+ def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
+ source = self._repo.get_tree(source_digest) if source_digest else None
- def _add_tree(self, path: Path, pstat: stat_result) -> Digest:
tree = Tree(self._repo, [])
- for item in sorted(path.iterdir()):
- digest = Digest()
- try:
- istat = item.lstat()
- if self.ignore(item, istat):
- self.report(Action.IGNORE, item, istat, IgnoreCause.IGNORE_RULE)
- continue
- object_type = object_type_from_mode(istat.st_mode)
- if object_type == b"slnk":
- digest = self._add_symlink(item, istat)
- elif object_type == b"tree":
- digest = self._add_tree(item, istat)
- elif object_type == b"blob":
- digest = self._add_blob(item, istat)
- else:
- self.report(Action.IGNORE, item, istat, IgnoreCause.UNSUPPORTED_TYPE)
+ subpaths = sorted(path.iterdir())
+
+ subpath_index = 0
+ source_item_index = 0
+
+ while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
+ subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
+ source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
+
+ if subpath and source_item:
+ if subpath.name < source_item.name:
+ source_item = None
+ elif subpath.name > source_item.name:
+ subpath = None
+
+ if subpath is not None:
+ subpath_index += 1
+ if source_item is not None:
+ source_item_index += 1
+
+ if subpath is not None:
+ digest = Digest()
+ try:
+ istat = subpath.lstat()
+
+ if self.ignore(subpath, istat, source=source_item):
+ self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
+ continue
+
+ if (source_item is not None and
+ not self._force_hash and
+ not stat.S_ISDIR(istat.st_mode) and
+ pstat.st_size == source_item.size and
+ pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
+ ):
+ self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
+ tree.items.append(source_item)
+ continue
+
+ sub_source_digest = source_item and source_item.digest
+ object_type = object_type_from_mode(istat.st_mode)
+ if object_type == b"slnk":
+ digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
+ elif object_type == b"tree":
+ digest = self._add_tree(subpath, istat, source_digest=sub_source_digest)
+ elif object_type == b"blob":
+ digest = self._add_blob(subpath, istat, source_digest=sub_source_digest)
+ else:
+ self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
+ continue
+ except Exception as err:
+ self.report(Action.ERROR, subpath, None, err)
continue
- except Exception as err:
- self.report(Action.ERROR, item, None, err)
- continue
- if digest:
- self.report(Action.ADD, path, pstat)
- tree.items.append(TreeItem(
- digest = digest,
- object_type = object_type,
- size = istat.st_size,
- permissions = stat.S_IMODE(istat.st_mode),
- modification_timestamp = istat.st_mtime_ns,
- name = item.name,
- ))
+ if digest:
+ tree.items.append(TreeItem(
+ digest = digest,
+ object_type = object_type,
+ size = istat.st_size,
+ permissions = stat.S_IMODE(istat.st_mode),
+ modification_timestamp_us = istat.st_mtime_ns // 1000,
+ name = subpath.name,
+ ))
+ elif source_item:
+ self.report(Action.REMOVE, path / source_item.name, None, source_item)
- return self._repo.add_tree(tree, dry_run=self._dry_run)
+ digest = self._repo.add_tree(tree, dry_run=self._dry_run)
+ action, info = Action.from_digests(digest, source_digest)
+ self.report(action, path, pstat, info)
+ return digest
- def _add_symlink(self, path: Path, pstat: stat_result) -> Digest:
+
+ def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
# TODO: Store symlink relative to current dir ?
# * What about symlink that points outside of the backup dirs
# * Should symlinks that points inside the backup dirs but in another
# mount-point adjusted ?
# * Should absolute symlink be restored as absolute ?
- self.report(Action.ADD, path, pstat)
- return self._repo._cas.write(
+ digest = self._repo._cas.write(
b"slnk",
path.readlink().as_posix().encode("utf-8"),
dry_run = self._dry_run,
)
- def _add_blob(self, path: Path, pstat: stat_result) -> Digest:
- self.report(Action.ADD, path, pstat)
+ action, info = Action.from_digests(digest, source_digest)
+ self.report(action, path, pstat, info)
+ return digest
+
+ def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
with path.open("rb") as stream:
- return self._repo.add_blob(stream, dry_run=self._dry_run)
+ digest = self._repo.add_blob(stream, dry_run=self._dry_run)
+ action, info = Action.from_digests(digest, source_digest)
+ self.report(action, path, pstat, info)
+ return digest
- def ignore(self, path: Path, pstat: stat_result) -> bool:
+
+ def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
return is_bsv_repository(path)
- def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | None=None):
+ def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
match action, info:
case (Action.ADD, None):
print(f"Add: {path}")
+ case (Action.UPDATE, None):
+ print(f"Add: {path}")
+ case (Action.REMOVE, item) if isinstance(item, TreeItem):
+ print(f"Remove: {path / item.name}")
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
print(f"Ignore (rule): {path}")
case (Action.IGNORE, IgnoreCause.UNCHANGED):
print(f"Ignore (unchanged): {path}")
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
- assert pstat is not None
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
case (Action.ERROR, _) if isinstance(info, Exception):
print(f"Error {info}: {path}")
diff --git a/src/bsv/util.py b/src/bsv/util.py
index 2c10877..60372d8 100644
--- a/src/bsv/util.py
+++ b/src/bsv/util.py
@@ -25,10 +25,10 @@ from typing import BinaryIO
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
-def time_from_timestamp(timestamp: int) -> DateTime:
+def time_from_timestamp_us(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
-def timestamp_from_time(time: DateTime) -> int:
+def timestamp_us_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
diff --git a/tests/test_repository.py b/tests/test_repository.py
index a46e143..4ba4e09 100644
--- a/tests/test_repository.py
+++ b/tests/test_repository.py
@@ -18,12 +18,13 @@ from datetime import UTC, datetime
from os import stat_result
from pathlib import Path
from random import randbytes
+from shutil import rmtree
from typing import Iterator
import pytest
from tempfile import TemporaryDirectory
-from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
+from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
from bsv.simple_cas.cas import Digest
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
@@ -66,7 +67,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"blob",
size = 123,
permissions = 0o744,
- modification_timestamp = timestamp_from_time(now),
+ modification_timestamp_us = timestamp_us_from_time(now),
name = "xyz",
),
TreeItem(
@@ -74,7 +75,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"slnk",
size = 42,
permissions = 0o777,
- modification_timestamp = timestamp_from_time(now),
+ modification_timestamp_us = timestamp_us_from_time(now),
name = "foobar",
),
]
@@ -90,8 +91,12 @@ def test_read_write_snapshot(repo: Repository):
snapshot = Snapshot(
repo = repo,
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
+ parents = [
+ Digest(bytes([123]) * repo._cas._digest_size),
+ Digest(bytes([124]) * repo._cas._digest_size),
+ ],
repo_name = "test_repo",
- timestamp = timestamp_from_time(datetime.now()),
+ timestamp_us = timestamp_us_from_time(datetime.now()),
)
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
@@ -104,7 +109,7 @@ class TestTreeWalker(TreeWalker):
reports: list
def __init__(self, repo: Repository, dry_run: bool=False):
- super().__init__(repo, dry_run)
+ super().__init__(repo, dry_run=dry_run)
self.reports = []
def report(
@@ -115,12 +120,12 @@ class TestTreeWalker(TreeWalker):
info: IgnoreCause | Exception | None = None
):
super().report(action, path, pstat, info)
- self.reports.append((action, path, pstat, info))
+ self.reports.append((action, path, info if action != Action.REMOVE else None))
def test_add_tree(tmp_dir: Path, repo: Repository):
- dir = tmp_dir / "test"
- structure = {
+ dir = tmp_dir / "test0"
+ structure0 = {
"folder": {
"sub_folder": {
"empty_folder": {},
@@ -134,11 +139,28 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
"bsv_config.toml": b"[bsv]\n",
},
}
+ structure1 = {
+ "folder": {
+ "sub_folder": {
+ "empty_folder": {},
+ "foo.txt": b"Hello World!\n",
+ },
+ "bar.dat": bytes(range(256)) * 2,
+ },
+ "new_file": b"whatever",
+ "Another test with long name and spaces and a bang !": b"Should works.\n",
+ "bsv_repo": {
+ "bsv_config.toml": b"[bsv]\n",
+ },
+ }
- create_file_structure(dir, structure)
+ expected0 = dict(structure0)
+ del expected0["bsv_repo"]
- walker = TestTreeWalker(repo)
- dir_digest = walker.add_tree(dir)
+ expected1 = dict(structure1)
+ del expected1["bsv_repo"]
+
+ create_file_structure(dir, structure0)
def check(digest: Digest, value: dict | bytes):
if isinstance(value, dict):
@@ -152,19 +174,79 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
data = blob.reader().read()
assert data == value
- expected = dict(structure)
- del expected["bsv_repo"]
- check(dir_digest, expected)
+ walker = TestTreeWalker(repo)
+ dir_digest0 = walker.add_tree(dir)
+ assert walker.reports == [
+ (Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
+ (Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
+ (Action.ADD, dir / "folder/bar.dat", None),
+ (Action.ADD, dir / "folder/sub_folder/empty_folder", None),
+ (Action.ADD, dir / "folder/sub_folder/foo.txt", None),
+ (Action.ADD, dir / "folder/sub_folder", None),
+ (Action.ADD, dir / "folder/test.py", None),
+ (Action.ADD, dir / "folder", None),
+ (Action.ADD, dir, None),
+ ]
+ check(dir_digest0, expected0)
+
+ create_file_structure(dir, structure1)
+
+ walker.reports.clear()
+ dir_digest1 = walker.add_tree(dir, source_digest=dir_digest0)
+ assert walker.reports == [
+ (Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
+ (Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
+ (Action.UPDATE, dir / "folder/bar.dat", None),
+ (Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
+ (Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
+ (Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
+ (Action.REMOVE, dir / "folder/test.py", None),
+ (Action.UPDATE, dir / "folder", None),
+ (Action.ADD, dir / "new_file", None),
+ (Action.UPDATE, dir, None),
+ ]
+ check(dir_digest1, expected1)
def create_file_structure(dst: Path, value: dict | bytes):
- assert not dst.exists()
- if isinstance(value, dict):
- dst.mkdir()
- for name, item in value.items():
- create_file_structure(dst / name, item)
- elif isinstance(value, bytes):
- dst.write_bytes(value)
+ if isinstance(value, bytes):
+ if dst.is_dir():
+ rmtree(str(dst))
+ if not dst.is_file() or dst.read_bytes() != value:
+ dst.write_bytes(value)
+ elif isinstance(value, dict):
+ if dst.is_file():
+ dst.unlink()
+ if not dst.is_dir():
+ dst.mkdir()
+
+ items = sorted(value.items())
+ fs_paths = sorted(dst.iterdir())
+
+ item_index = 0
+ fs_path_index = 0
+
+ while item_index < len(value) or fs_path_index < len(fs_paths):
+ name, subitem = items[item_index] if item_index < len(items) else (None, None)
+ fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
+
+ if name and fs_path:
+ if name < fs_path.name:
+ fs_path = None
+ elif name > fs_path.name:
+ name = None
+
+ if name:
+ item_index += 1
+ if fs_path:
+ fs_path_index += 1
+
+ if name:
+ create_file_structure(dst / name, subitem) # type: ignore
+ elif fs_path and fs_path.is_dir():
+ rmtree(fs_path)
+ elif fs_path:
+ fs_path.unlink()
else:
raise TypeError(f"invalid type {type(value).__name__} for parameter value")