SimpleCas basic implementation.

This commit is contained in:
2023-11-10 18:22:26 +01:00
parent bdbd65ae28
commit 8937b51a45
6 changed files with 258 additions and 3 deletions

View File

@@ -87,11 +87,17 @@ def init(
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
bsv_table.add("path_map", tomlkit.array())
cas_table = tomlkit.table()
cas_table.add("type", "simple")
cas_table.add("hash", "sha256")
doc = tomlkit.document()
doc.add(tomlkit.comment("bsv repository configuration"))
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
doc.add(tomlkit.nl())
doc.add("bsv", bsv_table)
doc.add(tomlkit.nl())
doc.add("cas", cas_table)
config_path = destination / "bsv_config.toml"
try:

View File

@@ -14,3 +14,6 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from bsv.simple_cas.cas import SimpleCas as Cas

134
src/bsv/simple_cas/cas.py Normal file
View File

@@ -0,0 +1,134 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from dataclasses import dataclass
import hashlib
from pathlib import Path
from typing import Any, BinaryIO, Callable, Optional
from bsv.simple_cas.util import Hash
class SimpleCas:
_root_dir: Path
_hash_factory: Callable[[], Hash]
_digest_size: int
_index: dict[bytes, IndexItem]
_is_inside_context: bool = False
def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]):
self._root_dir = root_dir
self._hash_factory = hash_factory
self._digest_size = self._hash_factory().digest_size
self._index = {}
if (self._root_dir / "cas.idx").exists():
with (self._root_dir / "cas.idx").open("rb") as stream:
while True:
digest = stream.read(self._digest_size)
if not digest:
break
object_type = stream.read(4)
offset = int.from_bytes(stream.read(4))
size = int.from_bytes(stream.read(4))
self._index[digest] = IndexItem(object_type, offset, size)
@classmethod
def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas:
return SimpleCas(
root_dir = root_dir,
hash_factory = hashlib.new(obj["hash"]), # type: ignore
)
def __enter__(self):
assert not self._is_inside_context
self._is_inside_context = True
def __exit__(self, exc_type, exc_value, traceback):
assert self._is_inside_context
self._is_inside_context = False
def __len__(self) -> int:
return len(self._index)
def __contains__(self, digest: bytes) -> bool:
assert len(digest) == self._digest_size
return digest in self._index
def read(self, digest: bytes) -> Optional[Object]:
item = self._index.get(digest)
if item is None:
return None
with (self._root_dir / "cas.dat").open("rb") as stream:
stream.seek(item.offset)
assert stream.read(self._digest_size) == digest
object_type = stream.read(4)
assert object_type == item.object_type
size = int.from_bytes(stream.read(4))
assert size == item.size
data = stream.read(size)
return Object(object_type, data)
def write(self, object_type: bytes, data: bytes) -> bytes:
assert len(object_type) == 4
assert len(data) < 2**32
hash = self._hash_factory()
hash.update(object_type)
hash.update(b"\0")
hash.update(len(data).to_bytes(4))
hash.update(b"\0")
hash.update(data)
digest = hash.digest()
if digest not in self:
with self._open_writer(digest, object_type, len(data)) as out:
out.write(digest)
out.write(object_type)
out.write(len(data).to_bytes(4))
out.write(data)
return digest
def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO:
dat_file = (self._root_dir / "cas.dat").open("ab")
offset = dat_file.tell()
self._index[digest] = IndexItem(object_type, offset, size)
with (self._root_dir / "cas.idx").open("ab") as idx_file:
idx_file.write(digest)
idx_file.write(object_type)
idx_file.write(offset.to_bytes(4))
idx_file.write(size.to_bytes(4))
return dat_file
@dataclass
class Object:
object_type: bytes
data: bytes
@dataclass
class IndexItem:
object_type: bytes
offset: int
size: int

View File

@@ -0,0 +1,31 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from abc import ABC, abstractmethod
class Hash(ABC):
name: str
digest_size: int
@abstractmethod
def update(self, *data: bytes | bytearray | memoryview):
...
@abstractmethod
def digest(self) -> bytes:
...