Improve TreeWalker algorithm.

This commit is contained in:
2023-11-22 20:18:35 +01:00
parent 17bef2e63a
commit 073fd5e567
5 changed files with 376 additions and 120 deletions

View File

@@ -18,12 +18,13 @@ from datetime import UTC, datetime
from os import stat_result
from pathlib import Path
from random import randbytes
from shutil import rmtree
from typing import Iterator
import pytest
from tempfile import TemporaryDirectory
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
from bsv.simple_cas.cas import Digest
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
@@ -66,7 +67,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"blob",
size = 123,
permissions = 0o744,
modification_timestamp = timestamp_from_time(now),
modification_timestamp_us = timestamp_us_from_time(now),
name = "xyz",
),
TreeItem(
@@ -74,7 +75,7 @@ def test_read_write_tree(repo: Repository):
object_type = b"slnk",
size = 42,
permissions = 0o777,
modification_timestamp = timestamp_from_time(now),
modification_timestamp_us = timestamp_us_from_time(now),
name = "foobar",
),
]
@@ -90,8 +91,12 @@ def test_read_write_snapshot(repo: Repository):
snapshot = Snapshot(
repo = repo,
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
parents = [
Digest(bytes([123]) * repo._cas._digest_size),
Digest(bytes([124]) * repo._cas._digest_size),
],
repo_name = "test_repo",
timestamp = timestamp_from_time(datetime.now()),
timestamp_us = timestamp_us_from_time(datetime.now()),
)
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
@@ -104,7 +109,7 @@ class TestTreeWalker(TreeWalker):
reports: list
def __init__(self, repo: Repository, dry_run: bool=False):
super().__init__(repo, dry_run)
super().__init__(repo, dry_run=dry_run)
self.reports = []
def report(
@@ -115,12 +120,12 @@ class TestTreeWalker(TreeWalker):
info: IgnoreCause | Exception | None = None
):
super().report(action, path, pstat, info)
self.reports.append((action, path, pstat, info))
self.reports.append((action, path, info if action != Action.REMOVE else None))
def test_add_tree(tmp_dir: Path, repo: Repository):
dir = tmp_dir / "test"
structure = {
dir = tmp_dir / "test0"
structure0 = {
"folder": {
"sub_folder": {
"empty_folder": {},
@@ -134,11 +139,28 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
"bsv_config.toml": b"[bsv]\n",
},
}
structure1 = {
"folder": {
"sub_folder": {
"empty_folder": {},
"foo.txt": b"Hello World!\n",
},
"bar.dat": bytes(range(256)) * 2,
},
"new_file": b"whatever",
"Another test with long name and spaces and a bang !": b"Should works.\n",
"bsv_repo": {
"bsv_config.toml": b"[bsv]\n",
},
}
create_file_structure(dir, structure)
expected0 = dict(structure0)
del expected0["bsv_repo"]
walker = TestTreeWalker(repo)
dir_digest = walker.add_tree(dir)
expected1 = dict(structure1)
del expected1["bsv_repo"]
create_file_structure(dir, structure0)
def check(digest: Digest, value: dict | bytes):
if isinstance(value, dict):
@@ -152,19 +174,79 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
data = blob.reader().read()
assert data == value
expected = dict(structure)
del expected["bsv_repo"]
check(dir_digest, expected)
walker = TestTreeWalker(repo)
dir_digest0 = walker.add_tree(dir)
assert walker.reports == [
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.ADD, dir / "folder/bar.dat", None),
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
(Action.ADD, dir / "folder/sub_folder", None),
(Action.ADD, dir / "folder/test.py", None),
(Action.ADD, dir / "folder", None),
(Action.ADD, dir, None),
]
check(dir_digest0, expected0)
create_file_structure(dir, structure1)
walker.reports.clear()
dir_digest1 = walker.add_tree(dir, source_digest=dir_digest0)
assert walker.reports == [
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
(Action.UPDATE, dir / "folder/bar.dat", None),
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
(Action.REMOVE, dir / "folder/test.py", None),
(Action.UPDATE, dir / "folder", None),
(Action.ADD, dir / "new_file", None),
(Action.UPDATE, dir, None),
]
check(dir_digest1, expected1)
def create_file_structure(dst: Path, value: dict | bytes):
assert not dst.exists()
if isinstance(value, dict):
dst.mkdir()
for name, item in value.items():
create_file_structure(dst / name, item)
elif isinstance(value, bytes):
dst.write_bytes(value)
if isinstance(value, bytes):
if dst.is_dir():
rmtree(str(dst))
if not dst.is_file() or dst.read_bytes() != value:
dst.write_bytes(value)
elif isinstance(value, dict):
if dst.is_file():
dst.unlink()
if not dst.is_dir():
dst.mkdir()
items = sorted(value.items())
fs_paths = sorted(dst.iterdir())
item_index = 0
fs_path_index = 0
while item_index < len(value) or fs_path_index < len(fs_paths):
name, subitem = items[item_index] if item_index < len(items) else (None, None)
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
if name and fs_path:
if name < fs_path.name:
fs_path = None
elif name > fs_path.name:
name = None
if name:
item_index += 1
if fs_path:
fs_path_index += 1
if name:
create_file_structure(dst / name, subitem) # type: ignore
elif fs_path and fs_path.is_dir():
rmtree(fs_path)
elif fs_path:
fs_path.unlink()
else:
raise TypeError(f"invalid type {type(value).__name__} for parameter value")