Backup, Synchronization, Versioning.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

286 lines
9.3 KiB

# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from datetime import UTC, datetime
from os import stat_result
from pathlib import Path
from random import randbytes
from shutil import rmtree
from typing import Iterator
import pytest
from tempfile import TemporaryDirectory
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
from bsv.simple_cas.cas import Digest
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
@pytest.fixture
def tmp_dir():
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
yield Path(tmp_dir)
@pytest.fixture
def repo(tmp_dir):
return create_repository(
tmp_dir / "bsv.config",
"test_repo",
tmp_dir / "bsv_repo",
)
def test_read_write_blob(tmp_dir: Path, repo: Repository):
path = tmp_dir / "test.dat"
make_random_file(path, 1 << 20)
with path.open("rb") as stream:
obj0 = repo.add_blob(stream)
assert obj0.object_type == b"blob"
with path.open("rb") as stream:
assert obj0.blob.reader().read() == stream.read()
obj1 = repo.get_blob(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
with path.open("rb") as stream:
assert obj1.blob.reader().read() == stream.read()
def test_read_write_tree(repo: Repository):
now = datetime.now(UTC)
tree = Tree(
repo,
[
TreeItem(
digest = Digest(bytes([42]) * repo._cas._digest_size),
object_type = b"blob",
size = 123,
permissions = 0o744,
modification_timestamp_us = timestamp_us_from_time(now),
name = "xyz",
),
TreeItem(
digest = Digest(bytes([123]) * repo._cas._digest_size),
object_type = b"slnk",
size = 42,
permissions = 0o777,
modification_timestamp_us = timestamp_us_from_time(now),
name = "foobar",
),
]
)
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
obj0 = repo.add_tree(tree)
assert obj0.object_type == b"tree"
assert obj0.tree == tree
obj1 = repo.get_tree(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
assert obj1.tree == obj0.tree
def test_read_write_snapshot(repo: Repository):
snapshot = Snapshot(
repo = repo,
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
parents = [
Digest(bytes([123]) * repo._cas._digest_size),
Digest(bytes([124]) * repo._cas._digest_size),
],
repo_name = "test_repo",
timestamp_us = timestamp_us_from_time(datetime.now()),
)
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
obj0 = repo.add_snapshot(snapshot)
assert obj0.object_type == b"snap"
assert obj0.snapshot == snapshot
obj1 = repo.get_snapshot(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
assert obj1.snapshot == obj0.snapshot
class TestTreeWalker(TreeWalker):
reports: list
def __init__(self, repo: Repository, dry_run: bool=False):
super().__init__(repo, dry_run=dry_run)
self.reports = []
def report(
self,
action: Action,
path: Path,
pstat: stat_result | None,
info: IgnoreCause | Exception | None = None
):
super().report(action, path, pstat, info)
self.reports.append((action, path, info if action != Action.REMOVE else None))
def test_add_tree(tmp_dir: Path, repo: Repository):
dir = tmp_dir / "test0"
structure0 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"test.py": b"print(\"Hello World!\")\n",
"bar.dat": bytes(range(256)),
},
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_repository.config": b"[bsv]\n",
},
}
structure1 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"bar.dat": bytes(range(256)) * 2,
},
"new_file": b"whatever",
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_repository.config": b"[bsv]\n",
},
}
expected0 = dict(structure0)
del expected0["bsv_repo"]
expected1 = dict(structure1)
del expected1["bsv_repo"]
create_file_structure(dir, structure0)
def check(digest: Digest, value: dict | bytes):
if isinstance(value, dict):
tree = repo.get_tree(digest).tree
assert tree
assert list(map(lambda i: i.name, tree.items)) == sorted(value.keys())
for item in tree.items:
check(item.digest, value[item.name])
elif isinstance(value, bytes):
blob_obj = repo.get_blob(digest)
data = blob_obj.blob.reader().read()
assert data == value
walker = TestTreeWalker(repo)
obj0 = walker.add_tree(dir)
assert obj0.object_type == b"tree"
assert walker.reports == [
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.ADD, dir / "folder/bar.dat", None),
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
(Action.ADD, dir / "folder/sub_folder", None),
(Action.ADD, dir / "folder/test.py", None),
(Action.ADD, dir / "folder", None),
(Action.ADD, dir, None),
]
check(obj0.digest, expected0)
create_file_structure(dir, structure1)
walker.reports.clear()
obj1 = walker.add_tree(dir, source_digest=obj0.digest)
assert obj0.object_type == b"tree"
assert walker.reports == [
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.UPDATE, dir / "folder/bar.dat", None),
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
(Action.REMOVE, dir / "folder/test.py", None),
(Action.UPDATE, dir / "folder", None),
(Action.ADD, dir / "new_file", None),
(Action.UPDATE, dir, None),
]
check(obj1.digest, expected1)
def create_file_structure(dst: Path, value: dict | bytes):
if isinstance(value, bytes):
if dst.is_dir():
rmtree(str(dst))
if not dst.is_file() or dst.read_bytes() != value:
dst.write_bytes(value)
elif isinstance(value, dict):
if dst.is_file():
dst.unlink()
if not dst.is_dir():
dst.mkdir()
items = sorted(value.items())
fs_paths = sorted(dst.iterdir())
item_index = 0
fs_path_index = 0
while item_index < len(value) or fs_path_index < len(fs_paths):
name, subitem = items[item_index] if item_index < len(items) else (None, None)
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
if name and fs_path:
if name < fs_path.name:
fs_path = None
elif name > fs_path.name:
name = None
if name:
item_index += 1
if fs_path:
fs_path_index += 1
if name:
create_file_structure(dst / name, subitem) # type: ignore
elif fs_path and fs_path.is_dir():
rmtree(fs_path)
elif fs_path:
fs_path.unlink()
else:
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
def make_random_file(path: Path, size: int):
with path.open("wb") as stream:
for chunk_size in iter_chunks(size):
stream.write(randbytes(chunk_size))
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
num_full_chunks = (size - 1) // chunk_size
for _ in range(num_full_chunks):
yield chunk_size
offset = num_full_chunks * chunk_size
if offset != size:
yield size - offset