You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
259 lines
10 KiB
259 lines
10 KiB
# bsv - Backup, Synchronization, Versioning
|
|
# Copyright (C) 2023 Simon Boyé
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime as DateTime, timedelta as TimeDelta
|
|
from enum import Enum
|
|
from os import stat_result
|
|
from pathlib import Path
|
|
import stat
|
|
|
|
from bsv.object import Digest
|
|
from bsv.path_map import PathMap
|
|
from bsv.repository import Repository, Tree, TreeItem
|
|
from bsv.util import is_bsv_repository, object_type_from_mode
|
|
|
|
|
|
class Action(Enum):
|
|
ADD = "add"
|
|
UPDATE = "update"
|
|
REMOVE = "remove"
|
|
IGNORE = "ignore"
|
|
ERROR = "error"
|
|
|
|
@classmethod
|
|
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
|
|
assert digest
|
|
if not source_digest:
|
|
return Action.ADD, None
|
|
elif source_digest == digest:
|
|
return Action.IGNORE, IgnoreCause.UNCHANGED
|
|
else:
|
|
return Action.UPDATE, None
|
|
|
|
class IgnoreCause(Enum):
|
|
IGNORE_RULE = "ignore_rule"
|
|
UNCHANGED = "unchanged"
|
|
UNSUPPORTED_TYPE = "unsupported_type"
|
|
|
|
|
|
class TreeWalker:
|
|
_repo: Repository
|
|
_time_rounding_us: int = 2000000
|
|
_force_hash: bool = False
|
|
_dry_run: bool = False
|
|
|
|
def __init__(
|
|
self,
|
|
repo: Repository,
|
|
*,
|
|
time_rounding_us: int = 2000000,
|
|
force_hash: bool = False,
|
|
dry_run: bool = False,
|
|
):
|
|
self._repo = repo
|
|
self._time_rounding_us = time_rounding_us
|
|
self._force_hash = force_hash
|
|
self._dry_run = dry_run
|
|
|
|
# def add_virtual_tree(self, paths: PathMap) -> Digest:
|
|
# root = {}
|
|
# for pair in paths.paths:
|
|
# vdir = root
|
|
# for part in pair.bsv.parts[:-1]:
|
|
# vdir = vdir.setdefault(part, {})
|
|
# vdir[pair.bsv.parts[-1]] = pair.fs
|
|
|
|
# return self._add_virtual_tree(root)
|
|
|
|
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
|
|
# tree = Tree(self._repo, [])
|
|
# for name, value in vtree.items():
|
|
# if isinstance(value, dict):
|
|
# digest = self._add_virtual_tree(value)
|
|
# elif isinstance(value, Path):
|
|
# digest = self.add_tree(value)
|
|
# else:
|
|
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
|
|
# tree.items.append(TreeItem(
|
|
# digest = digest,
|
|
# object_type = b"tree",
|
|
# size = 0,
|
|
# permissions = 0o766,
|
|
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
|
|
# name = name,
|
|
# ))
|
|
# return self._repo.add_tree(tree, dry_run=self._dry_run)
|
|
|
|
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> Digest:
|
|
pstat = path.stat(follow_symlinks=False)
|
|
if self.ignore(path, pstat):
|
|
self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
|
return Digest()
|
|
return self._add_tree(path, pstat, source_digest=source_digest)
|
|
|
|
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
|
source = self._repo.get_tree(source_digest) if source_digest else None
|
|
|
|
tree = Tree(self._repo, [])
|
|
subpaths = sorted(path.iterdir())
|
|
|
|
subpath_index = 0
|
|
source_item_index = 0
|
|
|
|
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
|
|
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
|
|
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
|
|
|
|
if subpath and source_item:
|
|
if subpath.name < source_item.name:
|
|
source_item = None
|
|
elif subpath.name > source_item.name:
|
|
subpath = None
|
|
|
|
if subpath is not None:
|
|
subpath_index += 1
|
|
if source_item is not None:
|
|
source_item_index += 1
|
|
|
|
if subpath is not None:
|
|
digest = Digest()
|
|
try:
|
|
istat = subpath.lstat()
|
|
|
|
if self.ignore(subpath, istat, source=source_item):
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
|
|
continue
|
|
|
|
if (source_item is not None and
|
|
not self._force_hash and
|
|
not stat.S_ISDIR(istat.st_mode) and
|
|
pstat.st_size == source_item.size and
|
|
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
|
|
):
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
|
|
tree.items.append(source_item)
|
|
continue
|
|
|
|
sub_source_digest = source_item and source_item.digest
|
|
object_type = object_type_from_mode(istat.st_mode)
|
|
if object_type == b"slnk":
|
|
digest = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
|
|
elif object_type == b"tree":
|
|
digest = self._add_tree(subpath, istat, source_digest=sub_source_digest)
|
|
elif object_type == b"blob":
|
|
digest = self._add_blob(subpath, istat, source_digest=sub_source_digest)
|
|
else:
|
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
|
continue
|
|
except Exception as err:
|
|
self.report(Action.ERROR, subpath, None, err)
|
|
continue
|
|
|
|
if digest:
|
|
tree.items.append(TreeItem(
|
|
digest = digest,
|
|
object_type = object_type,
|
|
size = istat.st_size,
|
|
permissions = stat.S_IMODE(istat.st_mode),
|
|
modification_timestamp_us = istat.st_mtime_ns // 1000,
|
|
name = subpath.name,
|
|
))
|
|
elif source_item:
|
|
self.report(Action.REMOVE, path / source_item.name, None, source_item)
|
|
|
|
digest = self._repo.add_tree(tree, dry_run=self._dry_run)
|
|
|
|
action, info = Action.from_digests(digest, source_digest)
|
|
self.report(action, path, pstat, info)
|
|
return digest
|
|
|
|
|
|
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
|
# TODO: Store symlink relative to current dir ?
|
|
# * What about symlink that points outside of the backup dirs
|
|
# * Should symlinks that points inside the backup dirs but in another
|
|
# mount-point adjusted ?
|
|
# * Should absolute symlink be restored as absolute ?
|
|
digest = self._repo._cas.write(
|
|
b"slnk",
|
|
path.readlink().as_posix().encode("utf-8"),
|
|
dry_run = self._dry_run,
|
|
)
|
|
|
|
action, info = Action.from_digests(digest, source_digest)
|
|
self.report(action, path, pstat, info)
|
|
return digest
|
|
|
|
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> Digest:
|
|
with path.open("rb") as stream:
|
|
digest = self._repo.add_blob(stream, dry_run=self._dry_run)
|
|
|
|
action, info = Action.from_digests(digest, source_digest)
|
|
self.report(action, path, pstat, info)
|
|
return digest
|
|
|
|
|
|
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
|
|
return is_bsv_repository(path)
|
|
|
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
|
|
match action, info:
|
|
case (Action.ADD, None):
|
|
print(f"Add: {path}")
|
|
case (Action.UPDATE, None):
|
|
print(f"Add: {path}")
|
|
case (Action.REMOVE, item) if isinstance(item, TreeItem):
|
|
print(f"Remove: {path / item.name}")
|
|
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
|
print(f"Ignore (rule): {path}")
|
|
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
|
print(f"Ignore (unchanged): {path}")
|
|
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
|
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
|
case (Action.ERROR, _) if isinstance(info, Exception):
|
|
print(f"Error {info}: {path}")
|
|
case _:
|
|
raise ValueError("TreeWalker.report(): unsupported parameter combination")
|
|
|
|
|
|
def path_type_name(pstat: stat_result) -> str:
|
|
parts = []
|
|
|
|
if stat.S_ISBLK(pstat.st_mode):
|
|
parts.append("block_device")
|
|
if stat.S_ISCHR(pstat.st_mode):
|
|
parts.append("char_device")
|
|
if stat.S_ISDIR(pstat.st_mode):
|
|
parts.append("dir")
|
|
if stat.S_ISDOOR(pstat.st_mode):
|
|
parts.append("door")
|
|
if stat.S_ISFIFO(pstat.st_mode):
|
|
parts.append("fifo")
|
|
if stat.S_ISLNK(pstat.st_mode):
|
|
parts.append("symlink")
|
|
if stat.S_ISPORT(pstat.st_mode):
|
|
parts.append("port")
|
|
if stat.S_ISREG(pstat.st_mode):
|
|
parts.append("file")
|
|
if stat.S_ISSOCK(pstat.st_mode):
|
|
parts.append("socket")
|
|
if stat.S_ISWHT(pstat.st_mode):
|
|
parts.append("whiteout")
|
|
|
|
if not parts:
|
|
return "unknown"
|
|
return ", ".join(parts)
|
|
|