Browse Source

SimpleCas basic implementation.

master
Draklaw 2 years ago
parent
commit
8937b51a45
  1. 9
      pyproject.toml
  2. 6
      src/bsv/command/init.py
  3. 3
      src/bsv/simple_cas/__init__.py
  4. 134
      src/bsv/simple_cas/cas.py
  5. 31
      src/bsv/simple_cas/util.py
  6. 76
      tests/test_simple_cas.py

9
pyproject.toml

@ -10,10 +10,15 @@ requires-python = ">=3.11"
classifiers = [ classifiers = [
# TODO # TODO
] ]
dynamic = ["version"]
dependencies = [ dependencies = [
"tomlkit" "tomlkit",
]
[project.optional-dependencies]
test = [
"pytest",
] ]
dynamic = ["version"]
[project.urls] [project.urls]
# TODO # TODO

6
src/bsv/command/init.py

@ -87,11 +87,17 @@ def init(
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem.")) bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
bsv_table.add("path_map", tomlkit.array()) bsv_table.add("path_map", tomlkit.array())
cas_table = tomlkit.table()
cas_table.add("type", "simple")
cas_table.add("hash", "sha256")
doc = tomlkit.document() doc = tomlkit.document()
doc.add(tomlkit.comment("bsv repository configuration")) doc.add(tomlkit.comment("bsv repository configuration"))
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}.")) doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
doc.add(tomlkit.nl()) doc.add(tomlkit.nl())
doc.add("bsv", bsv_table) doc.add("bsv", bsv_table)
doc.add(tomlkit.nl())
doc.add("cas", cas_table)
config_path = destination / "bsv_config.toml" config_path = destination / "bsv_config.toml"
try: try:

3
src/bsv/simple_cas/__init__.py

@ -14,3 +14,6 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations from __future__ import annotations
from bsv.simple_cas.cas import SimpleCas as Cas

134
src/bsv/simple_cas/cas.py

@ -0,0 +1,134 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from dataclasses import dataclass
import hashlib
from pathlib import Path
from typing import Any, BinaryIO, Callable, Optional
from bsv.simple_cas.util import Hash
class SimpleCas:
_root_dir: Path
_hash_factory: Callable[[], Hash]
_digest_size: int
_index: dict[bytes, IndexItem]
_is_inside_context: bool = False
def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]):
self._root_dir = root_dir
self._hash_factory = hash_factory
self._digest_size = self._hash_factory().digest_size
self._index = {}
if (self._root_dir / "cas.idx").exists():
with (self._root_dir / "cas.idx").open("rb") as stream:
while True:
digest = stream.read(self._digest_size)
if not digest:
break
object_type = stream.read(4)
offset = int.from_bytes(stream.read(4))
size = int.from_bytes(stream.read(4))
self._index[digest] = IndexItem(object_type, offset, size)
@classmethod
def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas:
return SimpleCas(
root_dir = root_dir,
hash_factory = hashlib.new(obj["hash"]), # type: ignore
)
def __enter__(self):
assert not self._is_inside_context
self._is_inside_context = True
def __exit__(self, exc_type, exc_value, traceback):
assert self._is_inside_context
self._is_inside_context = False
def __len__(self) -> int:
return len(self._index)
def __contains__(self, digest: bytes) -> bool:
assert len(digest) == self._digest_size
return digest in self._index
def read(self, digest: bytes) -> Optional[Object]:
item = self._index.get(digest)
if item is None:
return None
with (self._root_dir / "cas.dat").open("rb") as stream:
stream.seek(item.offset)
assert stream.read(self._digest_size) == digest
object_type = stream.read(4)
assert object_type == item.object_type
size = int.from_bytes(stream.read(4))
assert size == item.size
data = stream.read(size)
return Object(object_type, data)
def write(self, object_type: bytes, data: bytes) -> bytes:
assert len(object_type) == 4
assert len(data) < 2**32
hash = self._hash_factory()
hash.update(object_type)
hash.update(b"\0")
hash.update(len(data).to_bytes(4))
hash.update(b"\0")
hash.update(data)
digest = hash.digest()
if digest not in self:
with self._open_writer(digest, object_type, len(data)) as out:
out.write(digest)
out.write(object_type)
out.write(len(data).to_bytes(4))
out.write(data)
return digest
def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO:
dat_file = (self._root_dir / "cas.dat").open("ab")
offset = dat_file.tell()
self._index[digest] = IndexItem(object_type, offset, size)
with (self._root_dir / "cas.idx").open("ab") as idx_file:
idx_file.write(digest)
idx_file.write(object_type)
idx_file.write(offset.to_bytes(4))
idx_file.write(size.to_bytes(4))
return dat_file
@dataclass
class Object:
object_type: bytes
data: bytes
@dataclass
class IndexItem:
object_type: bytes
offset: int
size: int

31
src/bsv/simple_cas/util.py

@ -0,0 +1,31 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from abc import ABC, abstractmethod
class Hash(ABC):
name: str
digest_size: int
@abstractmethod
def update(self, *data: bytes | bytearray | memoryview):
...
@abstractmethod
def digest(self) -> bytes:
...

76
tests/test_simple_cas.py

@ -0,0 +1,76 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from hashlib import sha256
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest
from bsv.simple_cas.cas import SimpleCas
@pytest.fixture
def tmp_dir():
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
yield Path(tmp_dir)
@pytest.fixture
def cas(tmp_dir):
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
yield cas
def test_simple_cas(tmp_dir: Path):
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
assert len(cas) == 0
data = b"Hello World!"
digest = cas.write(b"blob", data)
assert len(cas) == 1
assert digest in cas
obj = cas.read(digest)
assert obj is not None
assert obj.object_type == b"blob"
assert obj.data == data
cas = SimpleCas(
tmp_dir,
sha256, # type: ignore
)
with cas:
assert len(cas) == 1
assert digest in cas
obj = cas.read(digest)
assert obj is not None
assert obj.object_type == b"blob"
assert obj.data == data
digest2 = cas.write(b"blob", data)
assert digest2 == digest
assert len(cas) == 1
Loading…
Cancel
Save