Backup, Synchronization, Versioning.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 lines
10 KiB

# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from datetime import datetime as DateTime, timedelta as TimeDelta
from enum import Enum
from os import stat_result
from pathlib import Path
import stat
from bsv.object import Digest
from bsv.path_map import PathMap
from bsv.repository import Repository, Tree, TreeItem
from bsv.util import is_bsv_repository, object_type_from_mode
class Action(Enum):
ADD = "add"
UPDATE = "update"
REMOVE = "remove"
IGNORE = "ignore"
ERROR = "error"
@classmethod
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
assert digest
if not source_digest:
return Action.ADD, None
elif source_digest == digest:
return Action.IGNORE, IgnoreCause.UNCHANGED
else:
return Action.UPDATE, None
class IgnoreCause(Enum):
IGNORE_RULE = "ignore_rule"
UNCHANGED = "unchanged"
UNSUPPORTED_TYPE = "unsupported_type"
class TreeWalker:
_repo: Repository
_time_rounding_us: int = 2000000
_force_hash: bool = False
_dry_run: bool = False
def __init__(
self,
repo: Repository,
*,
time_rounding_us: int = 2000000,
force_hash: bool = False,
dry_run: bool = False,
):
self._repo = repo
self._time_rounding_us = time_rounding_us
self._force_hash = force_hash
self._dry_run = dry_run
# def add_virtual_tree(self, paths: PathMap) -> Digest:
# root = {}
# for pair in paths.paths:
# vdir = root
# for part in pair.bsv.parts[:-1]:
# vdir = vdir.setdefault(part, {})
# vdir[pair.bsv.parts[-1]] = pair.fs
# return self._add_virtual_tree(root)
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
# tree = Tree(self._repo, [])
# for name, value in vtree.items():
# if isinstance(value, dict):
# digest = self._add_virtual_tree(value)
# elif isinstance(value, Path):
# digest = self.add_tree(value)
# else:
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
# tree.items.append(TreeItem(
# digest = digest,
# object_type = b"tree",
# size = 0,
# permissions = 0o766,
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
# name = name,
# ))
# return self._repo.add_tree(tree, dry_run=self._dry_run)
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest:
pstat = path.stat(follow_symlinks=False)
if self.ignore(path, pstat):
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
return Digest()
return self._add_tree(path, pstat, source_digest=source_digest)
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
source = self._repo.get_tree(source_digest) if source_digest else None
tree = Tree(self._repo, [])
subpaths = sorted(path.iterdir())
subpath_index = 0
source_item_index = 0
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
if subpath and source_item:
if subpath.name < source_item.name:
source_item = None
elif subpath.name > source_item.name:
subpath = None
if subpath is not None:
subpath_index += 1
if source_item is not None:
source_item_index += 1
if subpath is not None:
digest = Digest()
try:
istat = subpath.lstat()
if self.ignore(subpath, istat, source=source_item):
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
continue
if (source_item is not None and
not self._force_hash and
not stat.S_ISDIR(istat.st_mode) and
pstat.st_size == source_item.size and
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
):
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
tree.items.append(source_item)
continue
sub_source_digest = source_item and source_item.digest
object_type = object_type_from_mode(istat.st_mode)
if object_type == b"slnk":
digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
elif object_type == b"tree":
digest = self._add_tree(subpath, istat, source_digest=sub_source_digest)
elif object_type == b"blob":
digest = self._add_blob(subpath, istat, source_digest=sub_source_digest)
else:
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
continue
except Exception as err:
self.report(Action.ERROR, subpath, None, err)
continue
if digest:
tree.items.append(TreeItem(
digest = digest,
object_type = object_type,
size = istat.st_size,
permissions = stat.S_IMODE(istat.st_mode),
modification_timestamp_us = istat.st_mtime_ns // 1000,
name = subpath.name,
))
elif source_item:
self.report(Action.REMOVE, path / source_item.name, None, source_item)
digest = self._repo.add_tree(tree, dry_run=self._dry_run)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
# TODO: Store symlink relative to current dir ?
# * What about symlink that points outside of the backup dirs
# * Should symlinks that points inside the backup dirs but in another
# mount-point adjusted ?
# * Should absolute symlink be restored as absolute ?
digest = self._repo._cas.write(
b"slnk",
path.readlink().as_posix().encode("utf-8"),
dry_run = self._dry_run,
)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
with path.open("rb") as stream:
digest = self._repo.add_blob(stream, dry_run=self._dry_run)
action, info = Action.from_digests(digest, source_digest)
self.report(action, path, pstat, info)
return digest
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
return is_bsv_repository(path)
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
match action, info:
case (Action.ADD, None):
print(f"Add: {path}")
case (Action.UPDATE, None):
print(f"Add: {path}")
case (Action.REMOVE, item) if isinstance(item, TreeItem):
print(f"Remove: {path / item.name}")
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
print(f"Ignore (rule): {path}")
case (Action.IGNORE, IgnoreCause.UNCHANGED):
print(f"Ignore (unchanged): {path}")
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
case (Action.ERROR, _) if isinstance(info, Exception):
print(f"Error {info}: {path}")
case _:
raise ValueError("TreeWalker.report(): unsupported parameter combination")
def path_type_name(pstat: stat_result) -> str:
parts = []
if stat.S_ISBLK(pstat.st_mode):
parts.append("block_device")
if stat.S_ISCHR(pstat.st_mode):
parts.append("char_device")
if stat.S_ISDIR(pstat.st_mode):
parts.append("dir")
if stat.S_ISDOOR(pstat.st_mode):
parts.append("door")
if stat.S_ISFIFO(pstat.st_mode):
parts.append("fifo")
if stat.S_ISLNK(pstat.st_mode):
parts.append("symlink")
if stat.S_ISPORT(pstat.st_mode):
parts.append("port")
if stat.S_ISREG(pstat.st_mode):
parts.append("file")
if stat.S_ISSOCK(pstat.st_mode):
parts.append("socket")
if stat.S_ISWHT(pstat.st_mode):
parts.append("whiteout")
if not parts:
return "unknown"
return ", ".join(parts)