Browse Source

Improve TreeWalker algorithm.

master
Draklaw 2 years ago
parent
commit
073fd5e567
  1. 58
      src/bsv/path_map.py
  2. 122
      src/bsv/repository.py
  3. 188
      src/bsv/tree_walker.py
  4. 4
      src/bsv/util.py
  5. 124
      tests/test_repository.py

58
src/bsv/path_map.py

@ -0,0 +1,58 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from dataclasses import dataclass
from itertools import pairwise
from pathlib import Path, PurePosixPath
from typing import Any
@dataclass(order=True, frozen=True, slots=True)
class PathPair:
bsv: PurePosixPath
fs: Path
def __post_init__(self):
if not self.bsv.is_absolute() or not self.fs.is_absolute():
raise ValueError("paths in path_map must be absolute")
@classmethod
def from_obj(cls, obj: dict[str, str]) -> PathPair:
return cls(
bsv = PurePosixPath(obj["bsv"]),
fs = Path(obj["fs"]),
)
class PathMap:
paths: list[PathPair]
def __init__(self, paths: list[PathPair]=[]):
self.paths = sorted(paths)
for path0, path1 in pairwise(self.paths):
if path0 == path1 or path1.bsv.relative_to(path0.bsv):
raise ValueError("bsv paths must be unique and independent")
@classmethod
def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
return cls([
PathPair.from_obj(item)
for item in obj
])
def clone(self) -> PathMap:
return PathMap(self.paths)

122
src/bsv/repository.py

@ -19,19 +19,23 @@ from dataclasses import dataclass
from datetime import datetime as DateTime
import hashlib
from io import BytesIO
from pathlib import Path, PurePosixPath
from pathlib import Path
import platform
import tomllib
from typing import Any, BinaryIO, Callable, Type
from typing import TYPE_CHECKING, BinaryIO, Callable, Type
from fastcdc import fastcdc
import tomlkit
from bsv import __version__
from bsv.exception import ConfigError
from bsv.path_map import PathMap
from bsv.simple_cas import SimpleCas
from bsv.simple_cas.cas import Digest, SimpleCas
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
if TYPE_CHECKING:
from bsv.tree_walker import TreeWalker
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
@ -48,7 +52,7 @@ class Repository:
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
_path_map: list[PathPair]
_path_map: PathMap
# _remotes: list[object]
_context_depth: int = 0
@ -72,10 +76,7 @@ class Repository:
self._avg_chunk_size = bsv.get("avg_chunk_size")
self._max_chunk_size = bsv.get("max_chunk_size")
self._path_map = [
PathPair.from_obj(pair)
for pair in bsv.get("path_map", [])
]
self._path_map = PathMap.from_obj(bsv.get("path_map", []))
@property
def path(self) -> Path:
@ -90,14 +91,14 @@ class Repository:
return self._name
@property
def path_map(self) -> list[PathPair]:
return list(self._path_map)
def path_map(self) -> PathMap:
return self._path_map.clone()
def get_blob(self, digest: Digest) -> Blob:
with self:
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
def add_blob(self, stream: BinaryIO, dry_run: bool=False) -> Digest:
def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
with self:
return self._write(b"blob", stream, dry_run=dry_run)
@ -105,11 +106,11 @@ class Repository:
with self:
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
def add_tree(self, tree: Tree, dry_run: bool=False) -> Digest:
def add_tree(self, tree: Tree, *, dry_run: bool=False) -> Digest:
with self:
return self._cas.write(b"tree", tree.to_bytes(), dry_run=dry_run)
def add_tree_from_path(self, path: Path, dry_run: bool=False) -> Digest:
def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> Digest:
from bsv.tree_walker import TreeWalker
walker = TreeWalker(self, dry_run=dry_run)
return walker.add_tree(path)
@ -118,16 +119,44 @@ class Repository:
with self:
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
def add_snapshot(self, snapshot: Snapshot, dry_run: bool=False) -> Digest:
def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> Digest:
with self:
return self._cas.write(b"snap", snapshot.to_bytes(), dry_run=dry_run)
# def take_snapshot(
# self,
# parent_digests: list[Digest] = [],
# *,
# walker_type: Type[TreeWalker] | None = None,
# dry_run: bool = False,
# ):
# from bsv.tree_walker import TreeWalker
# walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
# # parents = [
# # self.get_snapshot(digest)
# # for digest in parent_digests
# # ]
# parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
# snapshot = Snapshot(
# repo = self,
# tree_digest = walker.add_virtual_tree(self._path_map, parent=),
# parents = parent_digests,
# repo_name = self._name,
# timestamp = timestamp_us_from_time(DateTime.now()),
# )
# return self.add_snapshot(snapshot, dry_run=dry_run)
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
obj = self._cas.read(digest, object_type=object_type)
stream = BytesIO(obj.data)
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
def _write(self, object_type: bytes, stream: BinaryIO, dry_run: bool=False) -> Digest:
def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
out = BytesIO()
size = 0
for chunk in fastcdc(
@ -332,7 +361,7 @@ class TreeItem:
object_type: bytes
size: int
permissions: int
modification_timestamp: int
modification_timestamp_us: int
name: str
def __init__(
@ -341,7 +370,7 @@ class TreeItem:
object_type: bytes,
size: int,
permissions: int,
modification_timestamp: int,
modification_timestamp_us: int,
name: str,
):
if "/\\" in name:
@ -350,15 +379,15 @@ class TreeItem:
self.object_type = object_type
self.size = size
self.permissions = permissions
self.modification_timestamp = modification_timestamp
self.modification_timestamp_us = modification_timestamp_us
self.name = name
@property
def modification_time(self) -> DateTime:
return time_from_timestamp(self.modification_timestamp)
return time_from_timestamp_us(self.modification_timestamp_us)
@modification_time.setter
def modification_time(self, time: DateTime):
self.modification_timestamp = timestamp_from_time(time)
self.modification_timestamp_us = timestamp_us_from_time(time)
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
@ -370,7 +399,7 @@ class TreeItem:
object_type = read_exact(stream, 4),
size = int.from_bytes(read_exact(stream, 8)),
permissions = int.from_bytes(read_exact(stream, 2)),
modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
)
@ -379,7 +408,7 @@ class TreeItem:
stream.write(self.object_type)
stream.write(self.size.to_bytes(8))
stream.write(self.permissions.to_bytes(2))
stream.write(self.modification_timestamp.to_bytes(8, signed=True))
stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
name_bytes = self.name.encode("utf-8")
stream.write(len(name_bytes).to_bytes(2))
stream.write(name_bytes)
@ -391,23 +420,31 @@ class TreeItem:
class Snapshot:
repo: Repository
tree_digest: Digest
parents: list[Digest]
repo_name: str
timestamp: int
timestamp_us: int
def __post_init__(self):
assert len(self.parents) < 256
@property
def time(self) -> DateTime:
return time_from_timestamp(self.timestamp)
return time_from_timestamp_us(self.timestamp_us)
@time.setter
def time(self, time: DateTime):
self.timestamp = timestamp_from_time(time)
self.timestamp_us = timestamp_us_from_time(time)
@classmethod
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
return Snapshot(
repo = repo,
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
parents = [
Digest(read_exact(stream, repo._cas._digest_size))
for _ in range(int.from_bytes(read_exact(stream, 1)))
],
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
)
@classmethod
@ -416,40 +453,17 @@ class Snapshot:
return cls.from_stream(repo, stream)
def write(self, stream: BinaryIO):
assert len(self.parents) < 256
stream.write(self.tree_digest.digest)
stream.write(len(self.parents).to_bytes(1))
for parent in self.parents:
stream.write(parent.digest)
repo_name_bytes = self.repo_name.encode("utf-8")
stream.write(len(repo_name_bytes).to_bytes(2))
stream.write(repo_name_bytes)
stream.write(self.timestamp.to_bytes(8, signed=True))
stream.write(self.timestamp_us.to_bytes(8, signed=True))
def to_bytes(self) -> bytes:
stream = BytesIO()
self.write(stream)
return stream.getvalue()
class PathPair:
bsv: PurePosixPath
fs: Path
def __init__(self, bsv: PurePosixPath, fs: Path):
self.bsv = bsv
self.fs = fs
@classmethod
def from_obj(cls, obj: dict[str, Any]) -> PathPair:
bsv = PurePosixPath(obj["bsv"])
fs = Path(obj["fs"])
if not bsv.is_absolute() or not fs.is_absolute():
raise ValueError("paths in path_map must be absolute")
return cls(
bsv = obj["bsv"],
fs = obj["fs"],
)
def __lt__(self, rhs: PathPair) -> bool:
return self.bsv < rhs.bsv

188
src/bsv/tree_walker.py

@ -15,12 +15,14 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from datetime import datetime as DateTime, timedelta as TimeDelta
from enum import Enum
from os import stat_result
from pathlib import Path
import stat
from bsv.object import Digest
from bsv.path_map import PathMap
from bsv.repository import Repository, Tree, TreeItem
from bsv.util import is_bsv_repository, object_type_from_mode
@ -28,9 +30,20 @@ from bsv.util import is_bsv_repository, object_type_from_mode
class Action(Enum):
ADD = "add"
UPDATE = "update"
REMOVE = "remove"
IGNORE = "ignore"
ERROR = "error"
@classmethod
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
assert digest
if not source_digest:
return Action.ADD, None
elif source_digest == digest:
return Action.IGNORE, IgnoreCause.UNCHANGED
else:
return Action.UPDATE, None
class IgnoreCause(Enum):
IGNORE_RULE = "ignore_rule"
UNCHANGED = "unchanged"
@ -39,88 +52,177 @@ class IgnoreCause(Enum):
class TreeWalker:
_repo: Repository
_time_rounding_us: int = 2000000
_force_hash: bool = False
_dry_run: bool = False
def __init__(self, repo: Repository, dry_run: bool=False):
def __init__(
self,
repo: Repository,
*,
time_rounding_us: int = 2000000,
force_hash: bool = False,
dry_run: bool = False,
):
self._repo = repo
self._time_rounding_us = time_rounding_us
self._force_hash = force_hash
self._dry_run = dry_run
def add_tree(self, path: Path) -> Digest:
# def add_virtual_tree(self, paths: PathMap) -> Digest:
# root = {}
# for pair in paths.paths:
# vdir = root
# for part in pair.bsv.parts[:-1]:
# vdir = vdir.setdefault(part, {})
# vdir[pair.bsv.parts[-1]] = pair.fs
# return self._add_virtual_tree(root)
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
# tree = Tree(self._repo, [])
# for name, value in vtree.items():
# if isinstance(value, dict):
# digest = self._add_virtual_tree(value)
# elif isinstance(value, Path):
# digest = self.add_tree(value)
# else:
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
# tree.items.append(TreeItem(
# digest = digest,
# object_type = b"tree",
# size = 0,
# permissions = 0o766,
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
# name = name,
# ))
# return self._repo.add_tree(tree, dry_run=self._dry_run)
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest:
pstat = path.stat(follow_symlinks=False)
if self.ignore(path, pstat):
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
return Digest()
return self._add_tree(path, pstat)
return self._add_tree(path, pstat, source_digest=source_digest)
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
source = self._repo.get_tree(source_digest) if source_digest else None
def _add_tree(self, path: Path, pstat: stat_result) -> Digest:
tree = Tree(self._repo, [])
for item in sorted(path.iterdir()):
digest = Digest()
try:
istat = item.lstat()
if self.ignore(item, istat):
self.report(Action.IGNORE, item, istat, IgnoreCause.IGNORE_RULE)
continue
object_type = object_type_from_mode(istat.st_mode)
if object_type == b"slnk":
digest = self._add_symlink(item, istat)
elif object_type == b"tree":
digest = self._add_tree(item, istat)
elif object_type == b"blob":
digest = self._add_blob(item, istat)
else:
self.report(Action.IGNORE, item, istat, IgnoreCause.UNSUPPORTED_TYPE)
subpaths = sorted(path.iterdir())
subpath_index = 0
source_item_index = 0
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
if subpath and source_item:
if subpath.name < source_item.name:
source_item = None
elif subpath.name > source_item.name:
subpath = None
if subpath is not None:
subpath_index += 1
if source_item is not None:
source_item_index += 1
if subpath is not None:
digest = Digest()
try:
istat = subpath.lstat()
if self.ignore(subpath, istat, source=source_item):
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
continue
if (source_item is not None and
not self._force_hash and
not stat.S_ISDIR(istat.st_mode) and
pstat.st_size == source_item.size and
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
):
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
tree.items.append(source_item)
continue
sub_source_digest = source_item and source_item.digest
object_type = object_type_from_mode(istat.st_mode)
if object_type == b"slnk":
digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
elif object_type == b"tree":
digest = self._add_tree(subpath, istat, source_digest=sub_source_digest)
elif object_type == b"blob":
digest = self._add_blob(subpath, istat, source_digest=sub_source_digest)
else:
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
continue
except Exception as err:
self.report(Action.ERROR, subpath, None, err)
continue
except Exception as err:
self.report(Action.ERROR, item, None, err)
continue
if digest:
self.report(Action.ADD, path, pstat)
tree.items.append(TreeItem(
digest = digest,
object_type = object_type,
size = istat.st_size,
permissions = stat.S_IMODE(istat.st_mode),
modification_timestamp = istat.st_mtime_ns,
name = item.name,
))
if digest:
tree.items.append(TreeItem(
digest = digest,
object_type = object_type,
size = istat.st_size,
permissions = stat.S_IMODE(istat.st_mode),
modification_timestamp_us = istat.st_mtime_ns // 1000,
name = subpath.name,
))
elif source_item:
self.report(Action.REMOVE, path / source_item.name, None, source_item)
return self._repo.add_tree(tree, dry_run=self._dry_run)
digest = self._repo.add_tree(tree, dry_run=self._dry_run)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def _add_symlink(self, path: Path, pstat: stat_result) -> Digest:
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
# TODO: Store symlink relative to current dir ?
# * What about symlink that points outside of the backup dirs
# * Should symlinks that points inside the backup dirs but in another
# mount-point adjusted ?
# * Should absolute symlink be restored as absolute ?
self.report(Action.ADD, path, pstat)
return self._repo._cas.write(
digest = self._repo._cas.write(
b"slnk",
path.readlink().as_posix().encode("utf-8"),
dry_run = self._dry_run,
)
def _add_blob(self, path: Path, pstat: stat_result) -> Digest:
self.report(Action.ADD, path, pstat)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
with path.open("rb") as stream:
return self._repo.add_blob(stream, dry_run=self._dry_run)
digest = self._repo.add_blob(stream, dry_run=self._dry_run)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def ignore(self, path: Path, pstat: stat_result) -> bool:
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
return is_bsv_repository(path)
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | None=None):
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
match action, info:
case (Action.ADD, None):
print(f"Add: {path}")
case (Action.UPDATE, None):
print(f"Add: {path}")
case (Action.REMOVE, item) if isinstance(item, TreeItem):
print(f"Remove: {path / item.name}")
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
print(f"Ignore (rule): {path}")
case (Action.IGNORE, IgnoreCause.UNCHANGED):
print(f"Ignore (unchanged): {path}")
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
assert pstat is not None
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
case (Action.ERROR, _) if isinstance(info, Exception):
print(f"Error {info}: {path}")

4
src/bsv/util.py

@ -25,10 +25,10 @@ from typing import BinaryIO
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
def time_from_timestamp(timestamp: int) -> DateTime:
def time_from_timestamp_us(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
def timestamp_from_time(time: DateTime) -> int:
def timestamp_us_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)

124
tests/test_repository.py

@ -18,12 +18,13 @@ from datetime import UTC, datetime
from os import stat_result
from pathlib import Path
from random import randbytes
from shutil import rmtree
from typing import Iterator
import pytest
from tempfile import TemporaryDirectory
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
from bsv.simple_cas.cas import Digest
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
@ -66,7 +67,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"blob",
size = 123,
permissions = 0o744,
modification_timestamp = timestamp_from_time(now),
modification_timestamp_us = timestamp_us_from_time(now),
name = "xyz",
),
TreeItem(
@ -74,7 +75,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"slnk",
size = 42,
permissions = 0o777,
modification_timestamp = timestamp_from_time(now),
modification_timestamp_us = timestamp_us_from_time(now),
name = "foobar",
),
]
@ -90,8 +91,12 @@ def test_read_write_snapshot(repo: Repository):
snapshot = Snapshot(
repo = repo,
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
parents = [
Digest(bytes([123]) * repo._cas._digest_size),
Digest(bytes([124]) * repo._cas._digest_size),
],
repo_name = "test_repo",
timestamp = timestamp_from_time(datetime.now()),
timestamp_us = timestamp_us_from_time(datetime.now()),
)
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
@ -104,7 +109,7 @@ class TestTreeWalker(TreeWalker):
reports: list
def __init__(self, repo: Repository, dry_run: bool=False):
super().__init__(repo, dry_run)
super().__init__(repo, dry_run=dry_run)
self.reports = []
def report(
@ -115,12 +120,12 @@ class TestTreeWalker(TreeWalker):
info: IgnoreCause | Exception | None = None
):
super().report(action, path, pstat, info)
self.reports.append((action, path, pstat, info))
self.reports.append((action, path, info if action != Action.REMOVE else None))
def test_add_tree(tmp_dir: Path, repo: Repository):
dir = tmp_dir / "test"
structure = {
dir = tmp_dir / "test0"
structure0 = {
"folder": {
"sub_folder": {
"empty_folder": {},
@ -134,11 +139,28 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
"bsv_config.toml": b"[bsv]\n",
},
}
structure1 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"bar.dat": bytes(range(256)) * 2,
},
"new_file": b"whatever",
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_config.toml": b"[bsv]\n",
},
}
create_file_structure(dir, structure)
expected0 = dict(structure0)
del expected0["bsv_repo"]
walker = TestTreeWalker(repo)
dir_digest = walker.add_tree(dir)
expected1 = dict(structure1)
del expected1["bsv_repo"]
create_file_structure(dir, structure0)
def check(digest: Digest, value: dict | bytes):
if isinstance(value, dict):
@ -152,19 +174,79 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
data = blob.reader().read()
assert data == value
expected = dict(structure)
del expected["bsv_repo"]
check(dir_digest, expected)
walker = TestTreeWalker(repo)
dir_digest0 = walker.add_tree(dir)
assert walker.reports == [
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.ADD, dir / "folder/bar.dat", None),
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
(Action.ADD, dir / "folder/sub_folder", None),
(Action.ADD, dir / "folder/test.py", None),
(Action.ADD, dir / "folder", None),
(Action.ADD, dir, None),
]
check(dir_digest0, expected0)
create_file_structure(dir, structure1)
walker.reports.clear()
dir_digest1 = walker.add_tree(dir, source_digest=dir_digest0)
assert walker.reports == [
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.UPDATE, dir / "folder/bar.dat", None),
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
(Action.REMOVE, dir / "folder/test.py", None),
(Action.UPDATE, dir / "folder", None),
(Action.ADD, dir / "new_file", None),
(Action.UPDATE, dir, None),
]
check(dir_digest1, expected1)
def create_file_structure(dst: Path, value: dict | bytes):
assert not dst.exists()
if isinstance(value, dict):
dst.mkdir()
for name, item in value.items():
create_file_structure(dst / name, item)
elif isinstance(value, bytes):
dst.write_bytes(value)
if isinstance(value, bytes):
if dst.is_dir():
rmtree(str(dst))
if not dst.is_file() or dst.read_bytes() != value:
dst.write_bytes(value)
elif isinstance(value, dict):
if dst.is_file():
dst.unlink()
if not dst.is_dir():
dst.mkdir()
items = sorted(value.items())
fs_paths = sorted(dst.iterdir())
item_index = 0
fs_path_index = 0
while item_index < len(value) or fs_path_index < len(fs_paths):
name, subitem = items[item_index] if item_index < len(items) else (None, None)
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
if name and fs_path:
if name < fs_path.name:
fs_path = None
elif name > fs_path.name:
name = None
if name:
item_index += 1
if fs_path:
fs_path_index += 1
if name:
create_file_structure(dst / name, subitem) # type: ignore
elif fs_path and fs_path.is_dir():
rmtree(fs_path)
elif fs_path:
fs_path.unlink()
else:
raise TypeError(f"invalid type {type(value).__name__} for parameter value")

Loading…
Cancel
Save