|
|
|
@ -15,12 +15,14 @@ |
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|
|
|
from __future__ import annotations |
|
|
|
|
|
|
|
from datetime import datetime as DateTime, timedelta as TimeDelta |
|
|
|
from enum import Enum |
|
|
|
from os import stat_result |
|
|
|
from pathlib import Path |
|
|
|
import stat |
|
|
|
|
|
|
|
from bsv.object import Digest |
|
|
|
from bsv.path_map import PathMap |
|
|
|
from bsv.repository import Repository, Tree, TreeItem |
|
|
|
from bsv.util import is_bsv_repository, object_type_from_mode |
|
|
|
|
|
|
|
@ -28,9 +30,20 @@ from bsv.util import is_bsv_repository, object_type_from_mode |
|
|
|
class Action(Enum): |
|
|
|
ADD = "add" |
|
|
|
UPDATE = "update" |
|
|
|
REMOVE = "remove" |
|
|
|
IGNORE = "ignore" |
|
|
|
ERROR = "error" |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]: |
|
|
|
assert digest |
|
|
|
if not source_digest: |
|
|
|
return Action.ADD, None |
|
|
|
elif source_digest == digest: |
|
|
|
return Action.IGNORE, IgnoreCause.UNCHANGED |
|
|
|
else: |
|
|
|
return Action.UPDATE, None |
|
|
|
|
|
|
|
class IgnoreCause(Enum): |
|
|
|
IGNORE_RULE = "ignore_rule" |
|
|
|
UNCHANGED = "unchanged" |
|
|
|
@ -39,88 +52,177 @@ class IgnoreCause(Enum): |
|
|
|
|
|
|
|
class TreeWalker: |
|
|
|
_repo: Repository |
|
|
|
_time_rounding_us: int = 2000000 |
|
|
|
_force_hash: bool = False |
|
|
|
_dry_run: bool = False |
|
|
|
|
|
|
|
def __init__(self, repo: Repository, dry_run: bool=False): |
|
|
|
def __init__( |
|
|
|
self, |
|
|
|
repo: Repository, |
|
|
|
*, |
|
|
|
time_rounding_us: int = 2000000, |
|
|
|
force_hash: bool = False, |
|
|
|
dry_run: bool = False, |
|
|
|
): |
|
|
|
self._repo = repo |
|
|
|
self._time_rounding_us = time_rounding_us |
|
|
|
self._force_hash = force_hash |
|
|
|
self._dry_run = dry_run |
|
|
|
|
|
|
|
def add_tree(self, path: Path) -> Digest: |
|
|
|
# def add_virtual_tree(self, paths: PathMap) -> Digest: |
|
|
|
# root = {} |
|
|
|
# for pair in paths.paths: |
|
|
|
# vdir = root |
|
|
|
# for part in pair.bsv.parts[:-1]: |
|
|
|
# vdir = vdir.setdefault(part, {}) |
|
|
|
# vdir[pair.bsv.parts[-1]] = pair.fs |
|
|
|
|
|
|
|
# return self._add_virtual_tree(root) |
|
|
|
|
|
|
|
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest: |
|
|
|
# tree = Tree(self._repo, []) |
|
|
|
# for name, value in vtree.items(): |
|
|
|
# if isinstance(value, dict): |
|
|
|
# digest = self._add_virtual_tree(value) |
|
|
|
# elif isinstance(value, Path): |
|
|
|
# digest = self.add_tree(value) |
|
|
|
# else: |
|
|
|
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree") |
|
|
|
# tree.items.append(TreeItem( |
|
|
|
# digest = digest, |
|
|
|
# object_type = b"tree", |
|
|
|
# size = 0, |
|
|
|
# permissions = 0o766, |
|
|
|
# modification_timestamp = timestamp_us_from_time(DateTime.now()), |
|
|
|
# name = name, |
|
|
|
# )) |
|
|
|
# return self._repo.add_tree(tree, dry_run=self._dry_run) |
|
|
|
|
|
|
|
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest: |
|
|
|
pstat = path.stat(follow_symlinks=False) |
|
|
|
if self.ignore(path, pstat): |
|
|
|
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE) |
|
|
|
return Digest() |
|
|
|
return self._add_tree(path, pstat) |
|
|
|
return self._add_tree(path, pstat, source_digest=source_digest) |
|
|
|
|
|
|
|
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: |
|
|
|
source = self._repo.get_tree(source_digest) if source_digest else None |
|
|
|
|
|
|
|
def _add_tree(self, path: Path, pstat: stat_result) -> Digest: |
|
|
|
tree = Tree(self._repo, []) |
|
|
|
for item in sorted(path.iterdir()): |
|
|
|
digest = Digest() |
|
|
|
try: |
|
|
|
istat = item.lstat() |
|
|
|
if self.ignore(item, istat): |
|
|
|
self.report(Action.IGNORE, item, istat, IgnoreCause.IGNORE_RULE) |
|
|
|
continue |
|
|
|
object_type = object_type_from_mode(istat.st_mode) |
|
|
|
if object_type == b"slnk": |
|
|
|
digest = self._add_symlink(item, istat) |
|
|
|
elif object_type == b"tree": |
|
|
|
digest = self._add_tree(item, istat) |
|
|
|
elif object_type == b"blob": |
|
|
|
digest = self._add_blob(item, istat) |
|
|
|
else: |
|
|
|
self.report(Action.IGNORE, item, istat, IgnoreCause.UNSUPPORTED_TYPE) |
|
|
|
subpaths = sorted(path.iterdir()) |
|
|
|
|
|
|
|
subpath_index = 0 |
|
|
|
source_item_index = 0 |
|
|
|
|
|
|
|
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)): |
|
|
|
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None |
|
|
|
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None |
|
|
|
|
|
|
|
if subpath and source_item: |
|
|
|
if subpath.name < source_item.name: |
|
|
|
source_item = None |
|
|
|
elif subpath.name > source_item.name: |
|
|
|
subpath = None |
|
|
|
|
|
|
|
if subpath is not None: |
|
|
|
subpath_index += 1 |
|
|
|
if source_item is not None: |
|
|
|
source_item_index += 1 |
|
|
|
|
|
|
|
if subpath is not None: |
|
|
|
digest = Digest() |
|
|
|
try: |
|
|
|
istat = subpath.lstat() |
|
|
|
|
|
|
|
if self.ignore(subpath, istat, source=source_item): |
|
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE) |
|
|
|
continue |
|
|
|
|
|
|
|
if (source_item is not None and |
|
|
|
not self._force_hash and |
|
|
|
not stat.S_ISDIR(istat.st_mode) and |
|
|
|
pstat.st_size == source_item.size and |
|
|
|
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us |
|
|
|
): |
|
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED) |
|
|
|
tree.items.append(source_item) |
|
|
|
continue |
|
|
|
|
|
|
|
sub_source_digest = source_item and source_item.digest |
|
|
|
object_type = object_type_from_mode(istat.st_mode) |
|
|
|
if object_type == b"slnk": |
|
|
|
digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest) |
|
|
|
elif object_type == b"tree": |
|
|
|
digest = self._add_tree(subpath, istat, source_digest=sub_source_digest) |
|
|
|
elif object_type == b"blob": |
|
|
|
digest = self._add_blob(subpath, istat, source_digest=sub_source_digest) |
|
|
|
else: |
|
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE) |
|
|
|
continue |
|
|
|
except Exception as err: |
|
|
|
self.report(Action.ERROR, subpath, None, err) |
|
|
|
continue |
|
|
|
except Exception as err: |
|
|
|
self.report(Action.ERROR, item, None, err) |
|
|
|
continue |
|
|
|
|
|
|
|
if digest: |
|
|
|
self.report(Action.ADD, path, pstat) |
|
|
|
tree.items.append(TreeItem( |
|
|
|
digest = digest, |
|
|
|
object_type = object_type, |
|
|
|
size = istat.st_size, |
|
|
|
permissions = stat.S_IMODE(istat.st_mode), |
|
|
|
modification_timestamp = istat.st_mtime_ns, |
|
|
|
name = item.name, |
|
|
|
)) |
|
|
|
if digest: |
|
|
|
tree.items.append(TreeItem( |
|
|
|
digest = digest, |
|
|
|
object_type = object_type, |
|
|
|
size = istat.st_size, |
|
|
|
permissions = stat.S_IMODE(istat.st_mode), |
|
|
|
modification_timestamp_us = istat.st_mtime_ns // 1000, |
|
|
|
name = subpath.name, |
|
|
|
)) |
|
|
|
elif source_item: |
|
|
|
self.report(Action.REMOVE, path / source_item.name, None, source_item) |
|
|
|
|
|
|
|
return self._repo.add_tree(tree, dry_run=self._dry_run) |
|
|
|
digest = self._repo.add_tree(tree, dry_run=self._dry_run) |
|
|
|
|
|
|
|
action, info = Action.from_digests(digest, source_digest) |
|
|
|
self.report(action, path, pstat, info) |
|
|
|
return digest |
|
|
|
|
|
|
|
def _add_symlink(self, path: Path, pstat: stat_result) -> Digest: |
|
|
|
|
|
|
|
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: |
|
|
|
# TODO: Store symlink relative to current dir ? |
|
|
|
# * What about symlink that points outside of the backup dirs |
|
|
|
# * Should symlinks that points inside the backup dirs but in another |
|
|
|
# mount-point adjusted ? |
|
|
|
# * Should absolute symlink be restored as absolute ? |
|
|
|
self.report(Action.ADD, path, pstat) |
|
|
|
return self._repo._cas.write( |
|
|
|
digest = self._repo._cas.write( |
|
|
|
b"slnk", |
|
|
|
path.readlink().as_posix().encode("utf-8"), |
|
|
|
dry_run = self._dry_run, |
|
|
|
) |
|
|
|
|
|
|
|
def _add_blob(self, path: Path, pstat: stat_result) -> Digest: |
|
|
|
self.report(Action.ADD, path, pstat) |
|
|
|
action, info = Action.from_digests(digest, source_digest) |
|
|
|
self.report(action, path, pstat, info) |
|
|
|
return digest |
|
|
|
|
|
|
|
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: |
|
|
|
with path.open("rb") as stream: |
|
|
|
return self._repo.add_blob(stream, dry_run=self._dry_run) |
|
|
|
digest = self._repo.add_blob(stream, dry_run=self._dry_run) |
|
|
|
|
|
|
|
action, info = Action.from_digests(digest, source_digest) |
|
|
|
self.report(action, path, pstat, info) |
|
|
|
return digest |
|
|
|
|
|
|
|
def ignore(self, path: Path, pstat: stat_result) -> bool: |
|
|
|
|
|
|
|
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool: |
|
|
|
return is_bsv_repository(path) |
|
|
|
|
|
|
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | None=None): |
|
|
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None): |
|
|
|
match action, info: |
|
|
|
case (Action.ADD, None): |
|
|
|
print(f"Add: {path}") |
|
|
|
case (Action.UPDATE, None): |
|
|
|
print(f"Add: {path}") |
|
|
|
case (Action.REMOVE, item) if isinstance(item, TreeItem): |
|
|
|
print(f"Remove: {path / item.name}") |
|
|
|
case (Action.IGNORE, IgnoreCause.IGNORE_RULE): |
|
|
|
print(f"Ignore (rule): {path}") |
|
|
|
case (Action.IGNORE, IgnoreCause.UNCHANGED): |
|
|
|
print(f"Ignore (unchanged): {path}") |
|
|
|
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None: |
|
|
|
assert pstat is not None |
|
|
|
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}") |
|
|
|
case (Action.ERROR, _) if isinstance(info, Exception): |
|
|
|
print(f"Error {info}: {path}") |
|
|
|
|