Improve TreeWalker algorithm.
This commit is contained in:
58
src/bsv/path_map.py
Normal file
58
src/bsv/path_map.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from itertools import pairwise
|
||||||
|
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(order=True, frozen=True, slots=True)
|
||||||
|
class PathPair:
|
||||||
|
bsv: PurePosixPath
|
||||||
|
fs: Path
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.bsv.is_absolute() or not self.fs.is_absolute():
|
||||||
|
raise ValueError("paths in path_map must be absolute")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: dict[str, str]) -> PathPair:
|
||||||
|
return cls(
|
||||||
|
bsv = PurePosixPath(obj["bsv"]),
|
||||||
|
fs = Path(obj["fs"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PathMap:
|
||||||
|
paths: list[PathPair]
|
||||||
|
|
||||||
|
def __init__(self, paths: list[PathPair]=[]):
|
||||||
|
self.paths = sorted(paths)
|
||||||
|
for path0, path1 in pairwise(self.paths):
|
||||||
|
if path0 == path1 or path1.bsv.relative_to(path0.bsv):
|
||||||
|
raise ValueError("bsv paths must be unique and independent")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
|
||||||
|
return cls([
|
||||||
|
PathPair.from_obj(item)
|
||||||
|
for item in obj
|
||||||
|
])
|
||||||
|
|
||||||
|
def clone(self) -> PathMap:
|
||||||
|
return PathMap(self.paths)
|
||||||
@@ -19,19 +19,23 @@ from dataclasses import dataclass
|
|||||||
from datetime import datetime as DateTime
|
from datetime import datetime as DateTime
|
||||||
import hashlib
|
import hashlib
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path, PurePosixPath
|
from pathlib import Path
|
||||||
import platform
|
import platform
|
||||||
import tomllib
|
import tomllib
|
||||||
from typing import Any, BinaryIO, Callable, Type
|
from typing import TYPE_CHECKING, BinaryIO, Callable, Type
|
||||||
|
|
||||||
from fastcdc import fastcdc
|
from fastcdc import fastcdc
|
||||||
import tomlkit
|
import tomlkit
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
from bsv.exception import ConfigError
|
from bsv.exception import ConfigError
|
||||||
|
from bsv.path_map import PathMap
|
||||||
from bsv.simple_cas import SimpleCas
|
from bsv.simple_cas import SimpleCas
|
||||||
from bsv.simple_cas.cas import Digest, SimpleCas
|
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||||
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
|
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||||
@@ -48,7 +52,7 @@ class Repository:
|
|||||||
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
||||||
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
||||||
|
|
||||||
_path_map: list[PathPair]
|
_path_map: PathMap
|
||||||
# _remotes: list[object]
|
# _remotes: list[object]
|
||||||
|
|
||||||
_context_depth: int = 0
|
_context_depth: int = 0
|
||||||
@@ -72,10 +76,7 @@ class Repository:
|
|||||||
self._avg_chunk_size = bsv.get("avg_chunk_size")
|
self._avg_chunk_size = bsv.get("avg_chunk_size")
|
||||||
self._max_chunk_size = bsv.get("max_chunk_size")
|
self._max_chunk_size = bsv.get("max_chunk_size")
|
||||||
|
|
||||||
self._path_map = [
|
self._path_map = PathMap.from_obj(bsv.get("path_map", []))
|
||||||
PathPair.from_obj(pair)
|
|
||||||
for pair in bsv.get("path_map", [])
|
|
||||||
]
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def path(self) -> Path:
|
def path(self) -> Path:
|
||||||
@@ -90,14 +91,14 @@ class Repository:
|
|||||||
return self._name
|
return self._name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def path_map(self) -> list[PathPair]:
|
def path_map(self) -> PathMap:
|
||||||
return list(self._path_map)
|
return self._path_map.clone()
|
||||||
|
|
||||||
def get_blob(self, digest: Digest) -> Blob:
|
def get_blob(self, digest: Digest) -> Blob:
|
||||||
with self:
|
with self:
|
||||||
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
|
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
|
||||||
|
|
||||||
def add_blob(self, stream: BinaryIO, dry_run: bool=False) -> Digest:
|
def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
|
||||||
with self:
|
with self:
|
||||||
return self._write(b"blob", stream, dry_run=dry_run)
|
return self._write(b"blob", stream, dry_run=dry_run)
|
||||||
|
|
||||||
@@ -105,11 +106,11 @@ class Repository:
|
|||||||
with self:
|
with self:
|
||||||
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
|
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
|
||||||
|
|
||||||
def add_tree(self, tree: Tree, dry_run: bool=False) -> Digest:
|
def add_tree(self, tree: Tree, *, dry_run: bool=False) -> Digest:
|
||||||
with self:
|
with self:
|
||||||
return self._cas.write(b"tree", tree.to_bytes(), dry_run=dry_run)
|
return self._cas.write(b"tree", tree.to_bytes(), dry_run=dry_run)
|
||||||
|
|
||||||
def add_tree_from_path(self, path: Path, dry_run: bool=False) -> Digest:
|
def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> Digest:
|
||||||
from bsv.tree_walker import TreeWalker
|
from bsv.tree_walker import TreeWalker
|
||||||
walker = TreeWalker(self, dry_run=dry_run)
|
walker = TreeWalker(self, dry_run=dry_run)
|
||||||
return walker.add_tree(path)
|
return walker.add_tree(path)
|
||||||
@@ -118,16 +119,44 @@ class Repository:
|
|||||||
with self:
|
with self:
|
||||||
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
|
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
|
||||||
|
|
||||||
def add_snapshot(self, snapshot: Snapshot, dry_run: bool=False) -> Digest:
|
def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> Digest:
|
||||||
with self:
|
with self:
|
||||||
return self._cas.write(b"snap", snapshot.to_bytes(), dry_run=dry_run)
|
return self._cas.write(b"snap", snapshot.to_bytes(), dry_run=dry_run)
|
||||||
|
|
||||||
|
# def take_snapshot(
|
||||||
|
# self,
|
||||||
|
# parent_digests: list[Digest] = [],
|
||||||
|
# *,
|
||||||
|
# walker_type: Type[TreeWalker] | None = None,
|
||||||
|
# dry_run: bool = False,
|
||||||
|
# ):
|
||||||
|
# from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
|
# walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
|
||||||
|
|
||||||
|
# # parents = [
|
||||||
|
# # self.get_snapshot(digest)
|
||||||
|
# # for digest in parent_digests
|
||||||
|
# # ]
|
||||||
|
# parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
|
||||||
|
|
||||||
|
# snapshot = Snapshot(
|
||||||
|
# repo = self,
|
||||||
|
# tree_digest = walker.add_virtual_tree(self._path_map, parent=),
|
||||||
|
# parents = parent_digests,
|
||||||
|
# repo_name = self._name,
|
||||||
|
# timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# )
|
||||||
|
# return self.add_snapshot(snapshot, dry_run=dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
|
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
|
||||||
obj = self._cas.read(digest, object_type=object_type)
|
obj = self._cas.read(digest, object_type=object_type)
|
||||||
stream = BytesIO(obj.data)
|
stream = BytesIO(obj.data)
|
||||||
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
|
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
|
||||||
|
|
||||||
def _write(self, object_type: bytes, stream: BinaryIO, dry_run: bool=False) -> Digest:
|
def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> Digest:
|
||||||
out = BytesIO()
|
out = BytesIO()
|
||||||
size = 0
|
size = 0
|
||||||
for chunk in fastcdc(
|
for chunk in fastcdc(
|
||||||
@@ -332,7 +361,7 @@ class TreeItem:
|
|||||||
object_type: bytes
|
object_type: bytes
|
||||||
size: int
|
size: int
|
||||||
permissions: int
|
permissions: int
|
||||||
modification_timestamp: int
|
modification_timestamp_us: int
|
||||||
name: str
|
name: str
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -341,7 +370,7 @@ class TreeItem:
|
|||||||
object_type: bytes,
|
object_type: bytes,
|
||||||
size: int,
|
size: int,
|
||||||
permissions: int,
|
permissions: int,
|
||||||
modification_timestamp: int,
|
modification_timestamp_us: int,
|
||||||
name: str,
|
name: str,
|
||||||
):
|
):
|
||||||
if "/\\" in name:
|
if "/\\" in name:
|
||||||
@@ -350,15 +379,15 @@ class TreeItem:
|
|||||||
self.object_type = object_type
|
self.object_type = object_type
|
||||||
self.size = size
|
self.size = size
|
||||||
self.permissions = permissions
|
self.permissions = permissions
|
||||||
self.modification_timestamp = modification_timestamp
|
self.modification_timestamp_us = modification_timestamp_us
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def modification_time(self) -> DateTime:
|
def modification_time(self) -> DateTime:
|
||||||
return time_from_timestamp(self.modification_timestamp)
|
return time_from_timestamp_us(self.modification_timestamp_us)
|
||||||
@modification_time.setter
|
@modification_time.setter
|
||||||
def modification_time(self, time: DateTime):
|
def modification_time(self, time: DateTime):
|
||||||
self.modification_timestamp = timestamp_from_time(time)
|
self.modification_timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
|
||||||
@@ -370,7 +399,7 @@ class TreeItem:
|
|||||||
object_type = read_exact(stream, 4),
|
object_type = read_exact(stream, 4),
|
||||||
size = int.from_bytes(read_exact(stream, 8)),
|
size = int.from_bytes(read_exact(stream, 8)),
|
||||||
permissions = int.from_bytes(read_exact(stream, 2)),
|
permissions = int.from_bytes(read_exact(stream, 2)),
|
||||||
modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -379,7 +408,7 @@ class TreeItem:
|
|||||||
stream.write(self.object_type)
|
stream.write(self.object_type)
|
||||||
stream.write(self.size.to_bytes(8))
|
stream.write(self.size.to_bytes(8))
|
||||||
stream.write(self.permissions.to_bytes(2))
|
stream.write(self.permissions.to_bytes(2))
|
||||||
stream.write(self.modification_timestamp.to_bytes(8, signed=True))
|
stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
|
||||||
name_bytes = self.name.encode("utf-8")
|
name_bytes = self.name.encode("utf-8")
|
||||||
stream.write(len(name_bytes).to_bytes(2))
|
stream.write(len(name_bytes).to_bytes(2))
|
||||||
stream.write(name_bytes)
|
stream.write(name_bytes)
|
||||||
@@ -391,23 +420,31 @@ class TreeItem:
|
|||||||
class Snapshot:
|
class Snapshot:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
tree_digest: Digest
|
tree_digest: Digest
|
||||||
|
parents: list[Digest]
|
||||||
repo_name: str
|
repo_name: str
|
||||||
timestamp: int
|
timestamp_us: int
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
assert len(self.parents) < 256
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def time(self) -> DateTime:
|
def time(self) -> DateTime:
|
||||||
return time_from_timestamp(self.timestamp)
|
return time_from_timestamp_us(self.timestamp_us)
|
||||||
@time.setter
|
@time.setter
|
||||||
def time(self, time: DateTime):
|
def time(self, time: DateTime):
|
||||||
self.timestamp = timestamp_from_time(time)
|
self.timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
|
||||||
return Snapshot(
|
return Snapshot(
|
||||||
repo = repo,
|
repo = repo,
|
||||||
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
||||||
|
parents = [
|
||||||
|
Digest(read_exact(stream, repo._cas._digest_size))
|
||||||
|
for _ in range(int.from_bytes(read_exact(stream, 1)))
|
||||||
|
],
|
||||||
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -416,40 +453,17 @@ class Snapshot:
|
|||||||
return cls.from_stream(repo, stream)
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
def write(self, stream: BinaryIO):
|
||||||
|
assert len(self.parents) < 256
|
||||||
stream.write(self.tree_digest.digest)
|
stream.write(self.tree_digest.digest)
|
||||||
|
stream.write(len(self.parents).to_bytes(1))
|
||||||
|
for parent in self.parents:
|
||||||
|
stream.write(parent.digest)
|
||||||
repo_name_bytes = self.repo_name.encode("utf-8")
|
repo_name_bytes = self.repo_name.encode("utf-8")
|
||||||
stream.write(len(repo_name_bytes).to_bytes(2))
|
stream.write(len(repo_name_bytes).to_bytes(2))
|
||||||
stream.write(repo_name_bytes)
|
stream.write(repo_name_bytes)
|
||||||
stream.write(self.timestamp.to_bytes(8, signed=True))
|
stream.write(self.timestamp_us.to_bytes(8, signed=True))
|
||||||
|
|
||||||
def to_bytes(self) -> bytes:
|
def to_bytes(self) -> bytes:
|
||||||
stream = BytesIO()
|
stream = BytesIO()
|
||||||
self.write(stream)
|
self.write(stream)
|
||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PathPair:
|
|
||||||
bsv: PurePosixPath
|
|
||||||
fs: Path
|
|
||||||
|
|
||||||
def __init__(self, bsv: PurePosixPath, fs: Path):
|
|
||||||
self.bsv = bsv
|
|
||||||
self.fs = fs
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_obj(cls, obj: dict[str, Any]) -> PathPair:
|
|
||||||
bsv = PurePosixPath(obj["bsv"])
|
|
||||||
fs = Path(obj["fs"])
|
|
||||||
|
|
||||||
if not bsv.is_absolute() or not fs.is_absolute():
|
|
||||||
raise ValueError("paths in path_map must be absolute")
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
bsv = obj["bsv"],
|
|
||||||
fs = obj["fs"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def __lt__(self, rhs: PathPair) -> bool:
|
|
||||||
return self.bsv < rhs.bsv
|
|
||||||
|
|||||||
@@ -15,12 +15,14 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime as DateTime, timedelta as TimeDelta
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from os import stat_result
|
from os import stat_result
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import stat
|
import stat
|
||||||
|
|
||||||
from bsv.object import Digest
|
from bsv.object import Digest
|
||||||
|
from bsv.path_map import PathMap
|
||||||
from bsv.repository import Repository, Tree, TreeItem
|
from bsv.repository import Repository, Tree, TreeItem
|
||||||
from bsv.util import is_bsv_repository, object_type_from_mode
|
from bsv.util import is_bsv_repository, object_type_from_mode
|
||||||
|
|
||||||
@@ -28,9 +30,20 @@ from bsv.util import is_bsv_repository, object_type_from_mode
|
|||||||
class Action(Enum):
|
class Action(Enum):
|
||||||
ADD = "add"
|
ADD = "add"
|
||||||
UPDATE = "update"
|
UPDATE = "update"
|
||||||
|
REMOVE = "remove"
|
||||||
IGNORE = "ignore"
|
IGNORE = "ignore"
|
||||||
ERROR = "error"
|
ERROR = "error"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
|
||||||
|
assert digest
|
||||||
|
if not source_digest:
|
||||||
|
return Action.ADD, None
|
||||||
|
elif source_digest == digest:
|
||||||
|
return Action.IGNORE, IgnoreCause.UNCHANGED
|
||||||
|
else:
|
||||||
|
return Action.UPDATE, None
|
||||||
|
|
||||||
class IgnoreCause(Enum):
|
class IgnoreCause(Enum):
|
||||||
IGNORE_RULE = "ignore_rule"
|
IGNORE_RULE = "ignore_rule"
|
||||||
UNCHANGED = "unchanged"
|
UNCHANGED = "unchanged"
|
||||||
@@ -39,88 +52,177 @@ class IgnoreCause(Enum):
|
|||||||
|
|
||||||
class TreeWalker:
|
class TreeWalker:
|
||||||
_repo: Repository
|
_repo: Repository
|
||||||
|
_time_rounding_us: int = 2000000
|
||||||
|
_force_hash: bool = False
|
||||||
_dry_run: bool = False
|
_dry_run: bool = False
|
||||||
|
|
||||||
def __init__(self, repo: Repository, dry_run: bool=False):
|
def __init__(
|
||||||
|
self,
|
||||||
|
repo: Repository,
|
||||||
|
*,
|
||||||
|
time_rounding_us: int = 2000000,
|
||||||
|
force_hash: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
):
|
||||||
self._repo = repo
|
self._repo = repo
|
||||||
|
self._time_rounding_us = time_rounding_us
|
||||||
|
self._force_hash = force_hash
|
||||||
self._dry_run = dry_run
|
self._dry_run = dry_run
|
||||||
|
|
||||||
def add_tree(self, path: Path) -> Digest:
|
# def add_virtual_tree(self, paths: PathMap) -> Digest:
|
||||||
|
# root = {}
|
||||||
|
# for pair in paths.paths:
|
||||||
|
# vdir = root
|
||||||
|
# for part in pair.bsv.parts[:-1]:
|
||||||
|
# vdir = vdir.setdefault(part, {})
|
||||||
|
# vdir[pair.bsv.parts[-1]] = pair.fs
|
||||||
|
|
||||||
|
# return self._add_virtual_tree(root)
|
||||||
|
|
||||||
|
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
|
||||||
|
# tree = Tree(self._repo, [])
|
||||||
|
# for name, value in vtree.items():
|
||||||
|
# if isinstance(value, dict):
|
||||||
|
# digest = self._add_virtual_tree(value)
|
||||||
|
# elif isinstance(value, Path):
|
||||||
|
# digest = self.add_tree(value)
|
||||||
|
# else:
|
||||||
|
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
|
||||||
|
# tree.items.append(TreeItem(
|
||||||
|
# digest = digest,
|
||||||
|
# object_type = b"tree",
|
||||||
|
# size = 0,
|
||||||
|
# permissions = 0o766,
|
||||||
|
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# name = name,
|
||||||
|
# ))
|
||||||
|
# return self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest:
|
||||||
pstat = path.stat(follow_symlinks=False)
|
pstat = path.stat(follow_symlinks=False)
|
||||||
if self.ignore(path, pstat):
|
if self.ignore(path, pstat):
|
||||||
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
||||||
return Digest()
|
return Digest()
|
||||||
return self._add_tree(path, pstat)
|
return self._add_tree(path, pstat, source_digest=source_digest)
|
||||||
|
|
||||||
|
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
||||||
|
source = self._repo.get_tree(source_digest) if source_digest else None
|
||||||
|
|
||||||
def _add_tree(self, path: Path, pstat: stat_result) -> Digest:
|
|
||||||
tree = Tree(self._repo, [])
|
tree = Tree(self._repo, [])
|
||||||
for item in sorted(path.iterdir()):
|
subpaths = sorted(path.iterdir())
|
||||||
digest = Digest()
|
|
||||||
try:
|
subpath_index = 0
|
||||||
istat = item.lstat()
|
source_item_index = 0
|
||||||
if self.ignore(item, istat):
|
|
||||||
self.report(Action.IGNORE, item, istat, IgnoreCause.IGNORE_RULE)
|
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
|
||||||
|
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
|
||||||
|
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
|
||||||
|
|
||||||
|
if subpath and source_item:
|
||||||
|
if subpath.name < source_item.name:
|
||||||
|
source_item = None
|
||||||
|
elif subpath.name > source_item.name:
|
||||||
|
subpath = None
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
subpath_index += 1
|
||||||
|
if source_item is not None:
|
||||||
|
source_item_index += 1
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
digest = Digest()
|
||||||
|
try:
|
||||||
|
istat = subpath.lstat()
|
||||||
|
|
||||||
|
if self.ignore(subpath, istat, source=source_item):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (source_item is not None and
|
||||||
|
not self._force_hash and
|
||||||
|
not stat.S_ISDIR(istat.st_mode) and
|
||||||
|
pstat.st_size == source_item.size and
|
||||||
|
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
|
||||||
|
):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
|
||||||
|
tree.items.append(source_item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sub_source_digest = source_item and source_item.digest
|
||||||
|
object_type = object_type_from_mode(istat.st_mode)
|
||||||
|
if object_type == b"slnk":
|
||||||
|
digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
elif object_type == b"tree":
|
||||||
|
digest = self._add_tree(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
elif object_type == b"blob":
|
||||||
|
digest = self._add_blob(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
else:
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
||||||
|
continue
|
||||||
|
except Exception as err:
|
||||||
|
self.report(Action.ERROR, subpath, None, err)
|
||||||
continue
|
continue
|
||||||
object_type = object_type_from_mode(istat.st_mode)
|
|
||||||
if object_type == b"slnk":
|
|
||||||
digest = self._add_symlink(item, istat)
|
|
||||||
elif object_type == b"tree":
|
|
||||||
digest = self._add_tree(item, istat)
|
|
||||||
elif object_type == b"blob":
|
|
||||||
digest = self._add_blob(item, istat)
|
|
||||||
else:
|
|
||||||
self.report(Action.IGNORE, item, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
|
||||||
continue
|
|
||||||
except Exception as err:
|
|
||||||
self.report(Action.ERROR, item, None, err)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if digest:
|
if digest:
|
||||||
self.report(Action.ADD, path, pstat)
|
tree.items.append(TreeItem(
|
||||||
tree.items.append(TreeItem(
|
digest = digest,
|
||||||
digest = digest,
|
object_type = object_type,
|
||||||
object_type = object_type,
|
size = istat.st_size,
|
||||||
size = istat.st_size,
|
permissions = stat.S_IMODE(istat.st_mode),
|
||||||
permissions = stat.S_IMODE(istat.st_mode),
|
modification_timestamp_us = istat.st_mtime_ns // 1000,
|
||||||
modification_timestamp = istat.st_mtime_ns,
|
name = subpath.name,
|
||||||
name = item.name,
|
))
|
||||||
))
|
elif source_item:
|
||||||
|
self.report(Action.REMOVE, path / source_item.name, None, source_item)
|
||||||
|
|
||||||
return self._repo.add_tree(tree, dry_run=self._dry_run)
|
digest = self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return digest
|
||||||
|
|
||||||
|
|
||||||
def _add_symlink(self, path: Path, pstat: stat_result) -> Digest:
|
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
||||||
# TODO: Store symlink relative to current dir ?
|
# TODO: Store symlink relative to current dir ?
|
||||||
# * What about symlink that points outside of the backup dirs
|
# * What about symlink that points outside of the backup dirs
|
||||||
# * Should symlinks that points inside the backup dirs but in another
|
# * Should symlinks that points inside the backup dirs but in another
|
||||||
# mount-point adjusted ?
|
# mount-point adjusted ?
|
||||||
# * Should absolute symlink be restored as absolute ?
|
# * Should absolute symlink be restored as absolute ?
|
||||||
self.report(Action.ADD, path, pstat)
|
digest = self._repo._cas.write(
|
||||||
return self._repo._cas.write(
|
|
||||||
b"slnk",
|
b"slnk",
|
||||||
path.readlink().as_posix().encode("utf-8"),
|
path.readlink().as_posix().encode("utf-8"),
|
||||||
dry_run = self._dry_run,
|
dry_run = self._dry_run,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _add_blob(self, path: Path, pstat: stat_result) -> Digest:
|
action, info = Action.from_digests(digest, source_digest)
|
||||||
self.report(Action.ADD, path, pstat)
|
self.report(action, path, pstat, info)
|
||||||
|
return digest
|
||||||
|
|
||||||
|
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
||||||
with path.open("rb") as stream:
|
with path.open("rb") as stream:
|
||||||
return self._repo.add_blob(stream, dry_run=self._dry_run)
|
digest = self._repo.add_blob(stream, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return digest
|
||||||
|
|
||||||
|
|
||||||
def ignore(self, path: Path, pstat: stat_result) -> bool:
|
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
|
||||||
return is_bsv_repository(path)
|
return is_bsv_repository(path)
|
||||||
|
|
||||||
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | None=None):
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
|
||||||
match action, info:
|
match action, info:
|
||||||
case (Action.ADD, None):
|
case (Action.ADD, None):
|
||||||
print(f"Add: {path}")
|
print(f"Add: {path}")
|
||||||
|
case (Action.UPDATE, None):
|
||||||
|
print(f"Add: {path}")
|
||||||
|
case (Action.REMOVE, item) if isinstance(item, TreeItem):
|
||||||
|
print(f"Remove: {path / item.name}")
|
||||||
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
||||||
print(f"Ignore (rule): {path}")
|
print(f"Ignore (rule): {path}")
|
||||||
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
||||||
print(f"Ignore (unchanged): {path}")
|
print(f"Ignore (unchanged): {path}")
|
||||||
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
||||||
assert pstat is not None
|
|
||||||
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
||||||
case (Action.ERROR, _) if isinstance(info, Exception):
|
case (Action.ERROR, _) if isinstance(info, Exception):
|
||||||
print(f"Error {info}: {path}")
|
print(f"Error {info}: {path}")
|
||||||
|
|||||||
@@ -25,10 +25,10 @@ from typing import BinaryIO
|
|||||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||||
|
|
||||||
|
|
||||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
def time_from_timestamp_us(timestamp: int) -> DateTime:
|
||||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||||
|
|
||||||
def timestamp_from_time(time: DateTime) -> int:
|
def timestamp_us_from_time(time: DateTime) -> int:
|
||||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,12 +18,13 @@ from datetime import UTC, datetime
|
|||||||
from os import stat_result
|
from os import stat_result
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from random import randbytes
|
from random import randbytes
|
||||||
|
from shutil import rmtree
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
|
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
|
||||||
from bsv.simple_cas.cas import Digest
|
from bsv.simple_cas.cas import Digest
|
||||||
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
||||||
|
|
||||||
@@ -66,7 +67,7 @@ def test_read_write_tree(repo: Repository):
|
|||||||
object_type = b"blob",
|
object_type = b"blob",
|
||||||
size = 123,
|
size = 123,
|
||||||
permissions = 0o744,
|
permissions = 0o744,
|
||||||
modification_timestamp = timestamp_from_time(now),
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
name = "xyz",
|
name = "xyz",
|
||||||
),
|
),
|
||||||
TreeItem(
|
TreeItem(
|
||||||
@@ -74,7 +75,7 @@ def test_read_write_tree(repo: Repository):
|
|||||||
object_type = b"slnk",
|
object_type = b"slnk",
|
||||||
size = 42,
|
size = 42,
|
||||||
permissions = 0o777,
|
permissions = 0o777,
|
||||||
modification_timestamp = timestamp_from_time(now),
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
name = "foobar",
|
name = "foobar",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@@ -90,8 +91,12 @@ def test_read_write_snapshot(repo: Repository):
|
|||||||
snapshot = Snapshot(
|
snapshot = Snapshot(
|
||||||
repo = repo,
|
repo = repo,
|
||||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||||
|
parents = [
|
||||||
|
Digest(bytes([123]) * repo._cas._digest_size),
|
||||||
|
Digest(bytes([124]) * repo._cas._digest_size),
|
||||||
|
],
|
||||||
repo_name = "test_repo",
|
repo_name = "test_repo",
|
||||||
timestamp = timestamp_from_time(datetime.now()),
|
timestamp_us = timestamp_us_from_time(datetime.now()),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||||
@@ -104,7 +109,7 @@ class TestTreeWalker(TreeWalker):
|
|||||||
reports: list
|
reports: list
|
||||||
|
|
||||||
def __init__(self, repo: Repository, dry_run: bool=False):
|
def __init__(self, repo: Repository, dry_run: bool=False):
|
||||||
super().__init__(repo, dry_run)
|
super().__init__(repo, dry_run=dry_run)
|
||||||
self.reports = []
|
self.reports = []
|
||||||
|
|
||||||
def report(
|
def report(
|
||||||
@@ -115,12 +120,12 @@ class TestTreeWalker(TreeWalker):
|
|||||||
info: IgnoreCause | Exception | None = None
|
info: IgnoreCause | Exception | None = None
|
||||||
):
|
):
|
||||||
super().report(action, path, pstat, info)
|
super().report(action, path, pstat, info)
|
||||||
self.reports.append((action, path, pstat, info))
|
self.reports.append((action, path, info if action != Action.REMOVE else None))
|
||||||
|
|
||||||
|
|
||||||
def test_add_tree(tmp_dir: Path, repo: Repository):
|
def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||||
dir = tmp_dir / "test"
|
dir = tmp_dir / "test0"
|
||||||
structure = {
|
structure0 = {
|
||||||
"folder": {
|
"folder": {
|
||||||
"sub_folder": {
|
"sub_folder": {
|
||||||
"empty_folder": {},
|
"empty_folder": {},
|
||||||
@@ -134,11 +139,28 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
|
|||||||
"bsv_config.toml": b"[bsv]\n",
|
"bsv_config.toml": b"[bsv]\n",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
structure1 = {
|
||||||
|
"folder": {
|
||||||
|
"sub_folder": {
|
||||||
|
"empty_folder": {},
|
||||||
|
"foo.txt": b"Hello World!\n",
|
||||||
|
},
|
||||||
|
"bar.dat": bytes(range(256)) * 2,
|
||||||
|
},
|
||||||
|
"new_file": b"whatever",
|
||||||
|
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||||
|
"bsv_repo": {
|
||||||
|
"bsv_config.toml": b"[bsv]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
create_file_structure(dir, structure)
|
expected0 = dict(structure0)
|
||||||
|
del expected0["bsv_repo"]
|
||||||
|
|
||||||
walker = TestTreeWalker(repo)
|
expected1 = dict(structure1)
|
||||||
dir_digest = walker.add_tree(dir)
|
del expected1["bsv_repo"]
|
||||||
|
|
||||||
|
create_file_structure(dir, structure0)
|
||||||
|
|
||||||
def check(digest: Digest, value: dict | bytes):
|
def check(digest: Digest, value: dict | bytes):
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
@@ -152,19 +174,79 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
|
|||||||
data = blob.reader().read()
|
data = blob.reader().read()
|
||||||
assert data == value
|
assert data == value
|
||||||
|
|
||||||
expected = dict(structure)
|
walker = TestTreeWalker(repo)
|
||||||
del expected["bsv_repo"]
|
dir_digest0 = walker.add_tree(dir)
|
||||||
check(dir_digest, expected)
|
assert walker.reports == [
|
||||||
|
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.ADD, dir / "folder/bar.dat", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/test.py", None),
|
||||||
|
(Action.ADD, dir / "folder", None),
|
||||||
|
(Action.ADD, dir, None),
|
||||||
|
]
|
||||||
|
check(dir_digest0, expected0)
|
||||||
|
|
||||||
|
create_file_structure(dir, structure1)
|
||||||
|
|
||||||
|
walker.reports.clear()
|
||||||
|
dir_digest1 = walker.add_tree(dir, source_digest=dir_digest0)
|
||||||
|
assert walker.reports == [
|
||||||
|
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.UPDATE, dir / "folder/bar.dat", None),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.REMOVE, dir / "folder/test.py", None),
|
||||||
|
(Action.UPDATE, dir / "folder", None),
|
||||||
|
(Action.ADD, dir / "new_file", None),
|
||||||
|
(Action.UPDATE, dir, None),
|
||||||
|
]
|
||||||
|
check(dir_digest1, expected1)
|
||||||
|
|
||||||
|
|
||||||
def create_file_structure(dst: Path, value: dict | bytes):
|
def create_file_structure(dst: Path, value: dict | bytes):
|
||||||
assert not dst.exists()
|
if isinstance(value, bytes):
|
||||||
if isinstance(value, dict):
|
if dst.is_dir():
|
||||||
dst.mkdir()
|
rmtree(str(dst))
|
||||||
for name, item in value.items():
|
if not dst.is_file() or dst.read_bytes() != value:
|
||||||
create_file_structure(dst / name, item)
|
dst.write_bytes(value)
|
||||||
elif isinstance(value, bytes):
|
elif isinstance(value, dict):
|
||||||
dst.write_bytes(value)
|
if dst.is_file():
|
||||||
|
dst.unlink()
|
||||||
|
if not dst.is_dir():
|
||||||
|
dst.mkdir()
|
||||||
|
|
||||||
|
items = sorted(value.items())
|
||||||
|
fs_paths = sorted(dst.iterdir())
|
||||||
|
|
||||||
|
item_index = 0
|
||||||
|
fs_path_index = 0
|
||||||
|
|
||||||
|
while item_index < len(value) or fs_path_index < len(fs_paths):
|
||||||
|
name, subitem = items[item_index] if item_index < len(items) else (None, None)
|
||||||
|
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
|
||||||
|
|
||||||
|
if name and fs_path:
|
||||||
|
if name < fs_path.name:
|
||||||
|
fs_path = None
|
||||||
|
elif name > fs_path.name:
|
||||||
|
name = None
|
||||||
|
|
||||||
|
if name:
|
||||||
|
item_index += 1
|
||||||
|
if fs_path:
|
||||||
|
fs_path_index += 1
|
||||||
|
|
||||||
|
if name:
|
||||||
|
create_file_structure(dst / name, subitem) # type: ignore
|
||||||
|
elif fs_path and fs_path.is_dir():
|
||||||
|
rmtree(fs_path)
|
||||||
|
elif fs_path:
|
||||||
|
fs_path.unlink()
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user