Improve TreeWalker algorithm.
This commit is contained in:
@@ -18,12 +18,13 @@ from datetime import UTC, datetime
|
||||
from os import stat_result
|
||||
from pathlib import Path
|
||||
from random import randbytes
|
||||
from shutil import rmtree
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
|
||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
|
||||
from bsv.simple_cas.cas import Digest
|
||||
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
||||
|
||||
@@ -66,7 +67,7 @@ def test_read_write_tree(repo: Repository):
|
||||
object_type = b"blob",
|
||||
size = 123,
|
||||
permissions = 0o744,
|
||||
modification_timestamp = timestamp_from_time(now),
|
||||
modification_timestamp_us = timestamp_us_from_time(now),
|
||||
name = "xyz",
|
||||
),
|
||||
TreeItem(
|
||||
@@ -74,7 +75,7 @@ def test_read_write_tree(repo: Repository):
|
||||
object_type = b"slnk",
|
||||
size = 42,
|
||||
permissions = 0o777,
|
||||
modification_timestamp = timestamp_from_time(now),
|
||||
modification_timestamp_us = timestamp_us_from_time(now),
|
||||
name = "foobar",
|
||||
),
|
||||
]
|
||||
@@ -90,8 +91,12 @@ def test_read_write_snapshot(repo: Repository):
|
||||
snapshot = Snapshot(
|
||||
repo = repo,
|
||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||
parents = [
|
||||
Digest(bytes([123]) * repo._cas._digest_size),
|
||||
Digest(bytes([124]) * repo._cas._digest_size),
|
||||
],
|
||||
repo_name = "test_repo",
|
||||
timestamp = timestamp_from_time(datetime.now()),
|
||||
timestamp_us = timestamp_us_from_time(datetime.now()),
|
||||
)
|
||||
|
||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||
@@ -104,7 +109,7 @@ class TestTreeWalker(TreeWalker):
|
||||
reports: list
|
||||
|
||||
def __init__(self, repo: Repository, dry_run: bool=False):
|
||||
super().__init__(repo, dry_run)
|
||||
super().__init__(repo, dry_run=dry_run)
|
||||
self.reports = []
|
||||
|
||||
def report(
|
||||
@@ -115,12 +120,12 @@ class TestTreeWalker(TreeWalker):
|
||||
info: IgnoreCause | Exception | None = None
|
||||
):
|
||||
super().report(action, path, pstat, info)
|
||||
self.reports.append((action, path, pstat, info))
|
||||
self.reports.append((action, path, info if action != Action.REMOVE else None))
|
||||
|
||||
|
||||
def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||
dir = tmp_dir / "test"
|
||||
structure = {
|
||||
dir = tmp_dir / "test0"
|
||||
structure0 = {
|
||||
"folder": {
|
||||
"sub_folder": {
|
||||
"empty_folder": {},
|
||||
@@ -134,11 +139,28 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||
"bsv_config.toml": b"[bsv]\n",
|
||||
},
|
||||
}
|
||||
structure1 = {
|
||||
"folder": {
|
||||
"sub_folder": {
|
||||
"empty_folder": {},
|
||||
"foo.txt": b"Hello World!\n",
|
||||
},
|
||||
"bar.dat": bytes(range(256)) * 2,
|
||||
},
|
||||
"new_file": b"whatever",
|
||||
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||
"bsv_repo": {
|
||||
"bsv_config.toml": b"[bsv]\n",
|
||||
},
|
||||
}
|
||||
|
||||
create_file_structure(dir, structure)
|
||||
expected0 = dict(structure0)
|
||||
del expected0["bsv_repo"]
|
||||
|
||||
walker = TestTreeWalker(repo)
|
||||
dir_digest = walker.add_tree(dir)
|
||||
expected1 = dict(structure1)
|
||||
del expected1["bsv_repo"]
|
||||
|
||||
create_file_structure(dir, structure0)
|
||||
|
||||
def check(digest: Digest, value: dict | bytes):
|
||||
if isinstance(value, dict):
|
||||
@@ -152,19 +174,79 @@ def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||
data = blob.reader().read()
|
||||
assert data == value
|
||||
|
||||
expected = dict(structure)
|
||||
del expected["bsv_repo"]
|
||||
check(dir_digest, expected)
|
||||
walker = TestTreeWalker(repo)
|
||||
dir_digest0 = walker.add_tree(dir)
|
||||
assert walker.reports == [
|
||||
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
|
||||
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||
(Action.ADD, dir / "folder/bar.dat", None),
|
||||
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
|
||||
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
|
||||
(Action.ADD, dir / "folder/sub_folder", None),
|
||||
(Action.ADD, dir / "folder/test.py", None),
|
||||
(Action.ADD, dir / "folder", None),
|
||||
(Action.ADD, dir, None),
|
||||
]
|
||||
check(dir_digest0, expected0)
|
||||
|
||||
create_file_structure(dir, structure1)
|
||||
|
||||
walker.reports.clear()
|
||||
dir_digest1 = walker.add_tree(dir, source_digest=dir_digest0)
|
||||
assert walker.reports == [
|
||||
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||
(Action.UPDATE, dir / "folder/bar.dat", None),
|
||||
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
|
||||
(Action.REMOVE, dir / "folder/test.py", None),
|
||||
(Action.UPDATE, dir / "folder", None),
|
||||
(Action.ADD, dir / "new_file", None),
|
||||
(Action.UPDATE, dir, None),
|
||||
]
|
||||
check(dir_digest1, expected1)
|
||||
|
||||
|
||||
def create_file_structure(dst: Path, value: dict | bytes):
|
||||
assert not dst.exists()
|
||||
if isinstance(value, dict):
|
||||
dst.mkdir()
|
||||
for name, item in value.items():
|
||||
create_file_structure(dst / name, item)
|
||||
elif isinstance(value, bytes):
|
||||
dst.write_bytes(value)
|
||||
if isinstance(value, bytes):
|
||||
if dst.is_dir():
|
||||
rmtree(str(dst))
|
||||
if not dst.is_file() or dst.read_bytes() != value:
|
||||
dst.write_bytes(value)
|
||||
elif isinstance(value, dict):
|
||||
if dst.is_file():
|
||||
dst.unlink()
|
||||
if not dst.is_dir():
|
||||
dst.mkdir()
|
||||
|
||||
items = sorted(value.items())
|
||||
fs_paths = sorted(dst.iterdir())
|
||||
|
||||
item_index = 0
|
||||
fs_path_index = 0
|
||||
|
||||
while item_index < len(value) or fs_path_index < len(fs_paths):
|
||||
name, subitem = items[item_index] if item_index < len(items) else (None, None)
|
||||
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
|
||||
|
||||
if name and fs_path:
|
||||
if name < fs_path.name:
|
||||
fs_path = None
|
||||
elif name > fs_path.name:
|
||||
name = None
|
||||
|
||||
if name:
|
||||
item_index += 1
|
||||
if fs_path:
|
||||
fs_path_index += 1
|
||||
|
||||
if name:
|
||||
create_file_structure(dst / name, subitem) # type: ignore
|
||||
elif fs_path and fs_path.is_dir():
|
||||
rmtree(fs_path)
|
||||
elif fs_path:
|
||||
fs_path.unlink()
|
||||
else:
|
||||
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user