SimpleCas basic implementation.
This commit is contained in:
@@ -10,10 +10,15 @@ requires-python = ">=3.11"
|
|||||||
classifiers = [
|
classifiers = [
|
||||||
# TODO
|
# TODO
|
||||||
]
|
]
|
||||||
dependencies = [
|
|
||||||
"tomlkit"
|
|
||||||
]
|
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
dependencies = [
|
||||||
|
"tomlkit",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
test = [
|
||||||
|
"pytest",
|
||||||
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
# TODO
|
# TODO
|
||||||
|
|||||||
@@ -87,11 +87,17 @@ def init(
|
|||||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
||||||
bsv_table.add("path_map", tomlkit.array())
|
bsv_table.add("path_map", tomlkit.array())
|
||||||
|
|
||||||
|
cas_table = tomlkit.table()
|
||||||
|
cas_table.add("type", "simple")
|
||||||
|
cas_table.add("hash", "sha256")
|
||||||
|
|
||||||
doc = tomlkit.document()
|
doc = tomlkit.document()
|
||||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
doc.add(tomlkit.comment("bsv repository configuration"))
|
||||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||||
doc.add(tomlkit.nl())
|
doc.add(tomlkit.nl())
|
||||||
doc.add("bsv", bsv_table)
|
doc.add("bsv", bsv_table)
|
||||||
|
doc.add(tomlkit.nl())
|
||||||
|
doc.add("cas", cas_table)
|
||||||
|
|
||||||
config_path = destination / "bsv_config.toml"
|
config_path = destination / "bsv_config.toml"
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -14,3 +14,6 @@
|
|||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
from bsv.simple_cas.cas import SimpleCas as Cas
|
||||||
|
|||||||
134
src/bsv/simple_cas/cas.py
Normal file
134
src/bsv/simple_cas/cas.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, BinaryIO, Callable, Optional
|
||||||
|
|
||||||
|
from bsv.simple_cas.util import Hash
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleCas:
|
||||||
|
_root_dir: Path
|
||||||
|
_hash_factory: Callable[[], Hash]
|
||||||
|
_digest_size: int
|
||||||
|
|
||||||
|
_index: dict[bytes, IndexItem]
|
||||||
|
|
||||||
|
_is_inside_context: bool = False
|
||||||
|
|
||||||
|
def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]):
|
||||||
|
self._root_dir = root_dir
|
||||||
|
self._hash_factory = hash_factory
|
||||||
|
self._digest_size = self._hash_factory().digest_size
|
||||||
|
|
||||||
|
self._index = {}
|
||||||
|
if (self._root_dir / "cas.idx").exists():
|
||||||
|
with (self._root_dir / "cas.idx").open("rb") as stream:
|
||||||
|
while True:
|
||||||
|
digest = stream.read(self._digest_size)
|
||||||
|
if not digest:
|
||||||
|
break
|
||||||
|
object_type = stream.read(4)
|
||||||
|
offset = int.from_bytes(stream.read(4))
|
||||||
|
size = int.from_bytes(stream.read(4))
|
||||||
|
self._index[digest] = IndexItem(object_type, offset, size)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas:
|
||||||
|
return SimpleCas(
|
||||||
|
root_dir = root_dir,
|
||||||
|
hash_factory = hashlib.new(obj["hash"]), # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
assert not self._is_inside_context
|
||||||
|
self._is_inside_context = True
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
|
assert self._is_inside_context
|
||||||
|
self._is_inside_context = False
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._index)
|
||||||
|
|
||||||
|
def __contains__(self, digest: bytes) -> bool:
|
||||||
|
assert len(digest) == self._digest_size
|
||||||
|
return digest in self._index
|
||||||
|
|
||||||
|
def read(self, digest: bytes) -> Optional[Object]:
|
||||||
|
item = self._index.get(digest)
|
||||||
|
if item is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
with (self._root_dir / "cas.dat").open("rb") as stream:
|
||||||
|
stream.seek(item.offset)
|
||||||
|
assert stream.read(self._digest_size) == digest
|
||||||
|
object_type = stream.read(4)
|
||||||
|
assert object_type == item.object_type
|
||||||
|
size = int.from_bytes(stream.read(4))
|
||||||
|
assert size == item.size
|
||||||
|
data = stream.read(size)
|
||||||
|
|
||||||
|
return Object(object_type, data)
|
||||||
|
|
||||||
|
def write(self, object_type: bytes, data: bytes) -> bytes:
|
||||||
|
assert len(object_type) == 4
|
||||||
|
assert len(data) < 2**32
|
||||||
|
|
||||||
|
hash = self._hash_factory()
|
||||||
|
hash.update(object_type)
|
||||||
|
hash.update(b"\0")
|
||||||
|
hash.update(len(data).to_bytes(4))
|
||||||
|
hash.update(b"\0")
|
||||||
|
hash.update(data)
|
||||||
|
digest = hash.digest()
|
||||||
|
|
||||||
|
if digest not in self:
|
||||||
|
with self._open_writer(digest, object_type, len(data)) as out:
|
||||||
|
out.write(digest)
|
||||||
|
out.write(object_type)
|
||||||
|
out.write(len(data).to_bytes(4))
|
||||||
|
out.write(data)
|
||||||
|
|
||||||
|
return digest
|
||||||
|
|
||||||
|
def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO:
|
||||||
|
dat_file = (self._root_dir / "cas.dat").open("ab")
|
||||||
|
offset = dat_file.tell()
|
||||||
|
self._index[digest] = IndexItem(object_type, offset, size)
|
||||||
|
|
||||||
|
with (self._root_dir / "cas.idx").open("ab") as idx_file:
|
||||||
|
idx_file.write(digest)
|
||||||
|
idx_file.write(object_type)
|
||||||
|
idx_file.write(offset.to_bytes(4))
|
||||||
|
idx_file.write(size.to_bytes(4))
|
||||||
|
|
||||||
|
return dat_file
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Object:
|
||||||
|
object_type: bytes
|
||||||
|
data: bytes
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IndexItem:
|
||||||
|
object_type: bytes
|
||||||
|
offset: int
|
||||||
|
size: int
|
||||||
31
src/bsv/simple_cas/util.py
Normal file
31
src/bsv/simple_cas/util.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class Hash(ABC):
|
||||||
|
name: str
|
||||||
|
digest_size: int
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update(self, *data: bytes | bytearray | memoryview):
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def digest(self) -> bytes:
|
||||||
|
...
|
||||||
76
tests/test_simple_cas.py
Normal file
76
tests/test_simple_cas.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
from hashlib import sha256
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bsv.simple_cas.cas import SimpleCas
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tmp_dir():
|
||||||
|
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
|
||||||
|
yield Path(tmp_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cas(tmp_dir):
|
||||||
|
cas = SimpleCas(
|
||||||
|
tmp_dir,
|
||||||
|
sha256, # type: ignore
|
||||||
|
)
|
||||||
|
with cas:
|
||||||
|
yield cas
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_cas(tmp_dir: Path):
|
||||||
|
cas = SimpleCas(
|
||||||
|
tmp_dir,
|
||||||
|
sha256, # type: ignore
|
||||||
|
)
|
||||||
|
with cas:
|
||||||
|
assert len(cas) == 0
|
||||||
|
|
||||||
|
data = b"Hello World!"
|
||||||
|
digest = cas.write(b"blob", data)
|
||||||
|
|
||||||
|
assert len(cas) == 1
|
||||||
|
assert digest in cas
|
||||||
|
|
||||||
|
obj = cas.read(digest)
|
||||||
|
assert obj is not None
|
||||||
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.data == data
|
||||||
|
|
||||||
|
cas = SimpleCas(
|
||||||
|
tmp_dir,
|
||||||
|
sha256, # type: ignore
|
||||||
|
)
|
||||||
|
with cas:
|
||||||
|
assert len(cas) == 1
|
||||||
|
assert digest in cas
|
||||||
|
|
||||||
|
obj = cas.read(digest)
|
||||||
|
assert obj is not None
|
||||||
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.data == data
|
||||||
|
|
||||||
|
digest2 = cas.write(b"blob", data)
|
||||||
|
assert digest2 == digest
|
||||||
|
assert len(cas) == 1
|
||||||
Reference in New Issue
Block a user