# bsv - Backup, Synchronization, Versioning # Copyright (C) 2023 Simon Boyé # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from __future__ import annotations from datetime import datetime as DateTime, timedelta as TimeDelta from enum import Enum from os import stat_result from pathlib import Path import stat from bsv.object import Digest from bsv.path_map import PathMap from bsv.repository import Repository, Tree, TreeItem from bsv.util import is_bsv_repository, object_type_from_mode class Action(Enum): ADD = "add" UPDATE = "update" REMOVE = "remove" IGNORE = "ignore" ERROR = "error" @classmethod def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]: assert digest if not source_digest: return Action.ADD, None elif source_digest == digest: return Action.IGNORE, IgnoreCause.UNCHANGED else: return Action.UPDATE, None class IgnoreCause(Enum): IGNORE_RULE = "ignore_rule" UNCHANGED = "unchanged" UNSUPPORTED_TYPE = "unsupported_type" class TreeWalker: _repo: Repository _time_rounding_us: int = 2000000 _force_hash: bool = False _dry_run: bool = False def __init__( self, repo: Repository, *, time_rounding_us: int = 2000000, force_hash: bool = False, dry_run: bool = False, ): self._repo = repo self._time_rounding_us = time_rounding_us self._force_hash = force_hash self._dry_run = dry_run # def add_virtual_tree(self, paths: PathMap) -> Digest: # root = {} # for pair in paths.paths: # vdir = root # for part in pair.bsv.parts[:-1]: # vdir = vdir.setdefault(part, {}) # vdir[pair.bsv.parts[-1]] = pair.fs # return self._add_virtual_tree(root) # def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest: # tree = Tree(self._repo, []) # for name, value in vtree.items(): # if isinstance(value, dict): # digest = self._add_virtual_tree(value) # elif isinstance(value, Path): # digest = self.add_tree(value) # else: # raise TypeError(f"unexpected type {type(vtree).__name__} for vtree") # tree.items.append(TreeItem( # digest = digest, # object_type = b"tree", # size = 0, # permissions = 0o766, # modification_timestamp = timestamp_us_from_time(DateTime.now()), # name = name, # )) # return self._repo.add_tree(tree, dry_run=self._dry_run) def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest: pstat = path.stat(follow_symlinks=False) if self.ignore(path, pstat): self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE) return Digest() return self._add_tree(path, pstat, source_digest=source_digest) def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: source = self._repo.get_tree(source_digest) if source_digest else None tree = Tree(self._repo, []) subpaths = sorted(path.iterdir()) subpath_index = 0 source_item_index = 0 while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)): subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None if subpath and source_item: if subpath.name < source_item.name: source_item = None elif subpath.name > source_item.name: subpath = None if subpath is not None: subpath_index += 1 if source_item is not None: source_item_index += 1 if subpath is not None: digest = Digest() try: istat = subpath.lstat() if self.ignore(subpath, istat, source=source_item): self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE) continue if (source_item is not None and not self._force_hash and not stat.S_ISDIR(istat.st_mode) and pstat.st_size == source_item.size and pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us ): self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED) tree.items.append(source_item) continue sub_source_digest = source_item and source_item.digest object_type = object_type_from_mode(istat.st_mode) if object_type == b"slnk": digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest) elif object_type == b"tree": digest = self._add_tree(subpath, istat, source_digest=sub_source_digest) elif object_type == b"blob": digest = self._add_blob(subpath, istat, source_digest=sub_source_digest) else: self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE) continue except Exception as err: self.report(Action.ERROR, subpath, None, err) continue if digest: tree.items.append(TreeItem( digest = digest, object_type = object_type, size = istat.st_size, permissions = stat.S_IMODE(istat.st_mode), modification_timestamp_us = istat.st_mtime_ns // 1000, name = subpath.name, )) elif source_item: self.report(Action.REMOVE, path / source_item.name, None, source_item) digest = self._repo.add_tree(tree, dry_run=self._dry_run) action, info = Action.from_digests(digest, source_digest) self.report(action, path, pstat, info) return digest def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: # TODO: Store symlink relative to current dir ? # * What about symlink that points outside of the backup dirs # * Should symlinks that points inside the backup dirs but in another # mount-point adjusted ? # * Should absolute symlink be restored as absolute ? digest = self._repo._cas.write( b"slnk", path.readlink().as_posix().encode("utf-8"), dry_run = self._dry_run, ) action, info = Action.from_digests(digest, source_digest) self.report(action, path, pstat, info) return digest def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest: with path.open("rb") as stream: digest = self._repo.add_blob(stream, dry_run=self._dry_run) action, info = Action.from_digests(digest, source_digest) self.report(action, path, pstat, info) return digest def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool: return is_bsv_repository(path) def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None): match action, info: case (Action.ADD, None): print(f"Add: {path}") case (Action.UPDATE, None): print(f"Add: {path}") case (Action.REMOVE, item) if isinstance(item, TreeItem): print(f"Remove: {path / item.name}") case (Action.IGNORE, IgnoreCause.IGNORE_RULE): print(f"Ignore (rule): {path}") case (Action.IGNORE, IgnoreCause.UNCHANGED): print(f"Ignore (unchanged): {path}") case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None: print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}") case (Action.ERROR, _) if isinstance(info, Exception): print(f"Error {info}: {path}") case _: raise ValueError("TreeWalker.report(): unsupported parameter combination") def path_type_name(pstat: stat_result) -> str: parts = [] if stat.S_ISBLK(pstat.st_mode): parts.append("block_device") if stat.S_ISCHR(pstat.st_mode): parts.append("char_device") if stat.S_ISDIR(pstat.st_mode): parts.append("dir") if stat.S_ISDOOR(pstat.st_mode): parts.append("door") if stat.S_ISFIFO(pstat.st_mode): parts.append("fifo") if stat.S_ISLNK(pstat.st_mode): parts.append("symlink") if stat.S_ISPORT(pstat.st_mode): parts.append("port") if stat.S_ISREG(pstat.st_mode): parts.append("file") if stat.S_ISSOCK(pstat.st_mode): parts.append("socket") if stat.S_ISWHT(pstat.st_mode): parts.append("whiteout") if not parts: return "unknown" return ", ".join(parts)