Restart from scratch.

This commit is contained in:
2025-04-29 18:54:34 +02:00
parent e74eaf0408
commit b1d2fe7717
20 changed files with 73 additions and 5 deletions

View File

@@ -1,286 +0,0 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from datetime import UTC, datetime
from os import stat_result
from pathlib import Path
from random import randbytes
from shutil import rmtree
from typing import Iterator
import pytest
from tempfile import TemporaryDirectory
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
from bsv.simple_cas.cas import Digest
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
@pytest.fixture
def tmp_dir():
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
yield Path(tmp_dir)
@pytest.fixture
def repo(tmp_dir):
return create_repository(
tmp_dir / "bsv.config",
"test_repo",
tmp_dir / "bsv_repo",
)
def test_read_write_blob(tmp_dir: Path, repo: Repository):
path = tmp_dir / "test.dat"
make_random_file(path, 1 << 20)
with path.open("rb") as stream:
obj0 = repo.add_blob(stream)
assert obj0.object_type == b"blob"
with path.open("rb") as stream:
assert obj0.blob.reader().read() == stream.read()
obj1 = repo.get_blob(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
with path.open("rb") as stream:
assert obj1.blob.reader().read() == stream.read()
def test_read_write_tree(repo: Repository):
now = datetime.now(UTC)
tree = Tree(
repo,
[
TreeItem(
digest = Digest(bytes([42]) * repo._cas._digest_size),
object_type = b"blob",
size = 123,
permissions = 0o744,
modification_timestamp_us = timestamp_us_from_time(now),
name = "xyz",
),
TreeItem(
digest = Digest(bytes([123]) * repo._cas._digest_size),
object_type = b"slnk",
size = 42,
permissions = 0o777,
modification_timestamp_us = timestamp_us_from_time(now),
name = "foobar",
),
]
)
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
obj0 = repo.add_tree(tree)
assert obj0.object_type == b"tree"
assert obj0.tree == tree
obj1 = repo.get_tree(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
assert obj1.tree == obj0.tree
def test_read_write_snapshot(repo: Repository):
snapshot = Snapshot(
repo = repo,
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
parents = [
Digest(bytes([123]) * repo._cas._digest_size),
Digest(bytes([124]) * repo._cas._digest_size),
],
repo_name = "test_repo",
timestamp_us = timestamp_us_from_time(datetime.now()),
)
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
obj0 = repo.add_snapshot(snapshot)
assert obj0.object_type == b"snap"
assert obj0.snapshot == snapshot
obj1 = repo.get_snapshot(obj0.digest)
assert obj1.digest == obj0.digest
assert obj1.object_type == obj0.object_type
assert obj1.size == obj0.size
assert obj1.snapshot == obj0.snapshot
class TestTreeWalker(TreeWalker):
reports: list
def __init__(self, repo: Repository, dry_run: bool=False):
super().__init__(repo, dry_run=dry_run)
self.reports = []
def report(
self,
action: Action,
path: Path,
pstat: stat_result | None,
info: IgnoreCause | Exception | None = None
):
super().report(action, path, pstat, info)
self.reports.append((action, path, info if action != Action.REMOVE else None))
def test_add_tree(tmp_dir: Path, repo: Repository):
dir = tmp_dir / "test0"
structure0 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"test.py": b"print(\"Hello World!\")\n",
"bar.dat": bytes(range(256)),
},
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_repository.config": b"[bsv]\n",
},
}
structure1 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"bar.dat": bytes(range(256)) * 2,
},
"new_file": b"whatever",
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_repository.config": b"[bsv]\n",
},
}
expected0 = dict(structure0)
del expected0["bsv_repo"]
expected1 = dict(structure1)
del expected1["bsv_repo"]
create_file_structure(dir, structure0)
def check(digest: Digest, value: dict | bytes):
if isinstance(value, dict):
tree = repo.get_tree(digest).tree
assert tree
assert list(map(lambda i: i.name, tree.items)) == sorted(value.keys())
for item in tree.items:
check(item.digest, value[item.name])
elif isinstance(value, bytes):
blob_obj = repo.get_blob(digest)
data = blob_obj.blob.reader().read()
assert data == value
walker = TestTreeWalker(repo)
obj0 = walker.add_tree(dir)
assert obj0.object_type == b"tree"
assert walker.reports == [
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.ADD, dir / "folder/bar.dat", None),
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
(Action.ADD, dir / "folder/sub_folder", None),
(Action.ADD, dir / "folder/test.py", None),
(Action.ADD, dir / "folder", None),
(Action.ADD, dir, None),
]
check(obj0.digest, expected0)
create_file_structure(dir, structure1)
walker.reports.clear()
obj1 = walker.add_tree(dir, source_digest=obj0.digest)
assert obj0.object_type == b"tree"
assert walker.reports == [
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.UPDATE, dir / "folder/bar.dat", None),
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
(Action.REMOVE, dir / "folder/test.py", None),
(Action.UPDATE, dir / "folder", None),
(Action.ADD, dir / "new_file", None),
(Action.UPDATE, dir, None),
]
check(obj1.digest, expected1)
def create_file_structure(dst: Path, value: dict | bytes):
if isinstance(value, bytes):
if dst.is_dir():
rmtree(str(dst))
if not dst.is_file() or dst.read_bytes() != value:
dst.write_bytes(value)
elif isinstance(value, dict):
if dst.is_file():
dst.unlink()
if not dst.is_dir():
dst.mkdir()
items = sorted(value.items())
fs_paths = sorted(dst.iterdir())
item_index = 0
fs_path_index = 0
while item_index < len(value) or fs_path_index < len(fs_paths):
name, subitem = items[item_index] if item_index < len(items) else (None, None)
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
if name and fs_path:
if name < fs_path.name:
fs_path = None
elif name > fs_path.name:
name = None
if name:
item_index += 1
if fs_path:
fs_path_index += 1
if name:
create_file_structure(dst / name, subitem) # type: ignore
elif fs_path and fs_path.is_dir():
rmtree(fs_path)
elif fs_path:
fs_path.unlink()
else:
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
def make_random_file(path: Path, size: int):
with path.open("wb") as stream:
for chunk_size in iter_chunks(size):
stream.write(randbytes(chunk_size))
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
num_full_chunks = (size - 1) // chunk_size
for _ in range(num_full_chunks):
yield chunk_size
offset = num_full_chunks * chunk_size
if offset != size:
yield size - offset

View File

@@ -1,88 +0,0 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from hashlib import sha256
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest
from bsv.simple_cas.cas import Digest, SimpleCas
@pytest.fixture
def tmp_dir():
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
yield Path(tmp_dir)
@pytest.fixture
def cas(tmp_dir):
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
yield cas
def test_simple_cas(tmp_dir: Path):
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
assert len(cas) == 0
data = b"Hello World!"
digest = cas.write(b"blob", data)
assert len(cas) == 1
assert digest in cas
obj = cas.read(digest)
assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
assert len(cas) == 1
assert digest in cas
obj = cas.read(digest)
assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data
digest2 = cas.write(b"blob", data)
assert digest2 == digest
assert len(cas) == 1
def test_refs(cas: SimpleCas):
digest = Digest(bytes([42] * cas._digest_size))
assert cas.get_ref("foo/bar") is None
cas.set_ref("foo/bar", digest)
assert cas.get_ref("foo/bar") == digest
assert cas.get_ref("foo") is None