diff --git a/pyproject.toml b/pyproject.toml
index bf0236d..fe48ab2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,10 +10,15 @@ requires-python = ">=3.11"
classifiers = [
# TODO
]
+dynamic = ["version"]
dependencies = [
- "tomlkit"
+ "tomlkit",
+]
+
+[project.optional-dependencies]
+test = [
+ "pytest",
]
-dynamic = ["version"]
[project.urls]
# TODO
diff --git a/src/bsv/command/init.py b/src/bsv/command/init.py
index 1d14d45..765f1cc 100644
--- a/src/bsv/command/init.py
+++ b/src/bsv/command/init.py
@@ -87,11 +87,17 @@ def init(
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
bsv_table.add("path_map", tomlkit.array())
+ cas_table = tomlkit.table()
+ cas_table.add("type", "simple")
+ cas_table.add("hash", "sha256")
+
doc = tomlkit.document()
doc.add(tomlkit.comment("bsv repository configuration"))
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
doc.add(tomlkit.nl())
doc.add("bsv", bsv_table)
+ doc.add(tomlkit.nl())
+ doc.add("cas", cas_table)
config_path = destination / "bsv_config.toml"
try:
diff --git a/src/bsv/simple_cas/__init__.py b/src/bsv/simple_cas/__init__.py
index 3e9d59f..a3e483a 100644
--- a/src/bsv/simple_cas/__init__.py
+++ b/src/bsv/simple_cas/__init__.py
@@ -14,3 +14,6 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
from __future__ import annotations
+
+
+from bsv.simple_cas.cas import SimpleCas as Cas
diff --git a/src/bsv/simple_cas/cas.py b/src/bsv/simple_cas/cas.py
new file mode 100644
index 0000000..504fe43
--- /dev/null
+++ b/src/bsv/simple_cas/cas.py
@@ -0,0 +1,134 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+
+from dataclasses import dataclass
+import hashlib
+from pathlib import Path
+from typing import Any, BinaryIO, Callable, Optional
+
+from bsv.simple_cas.util import Hash
+
+
+class SimpleCas:
+ _root_dir: Path
+ _hash_factory: Callable[[], Hash]
+ _digest_size: int
+
+ _index: dict[bytes, IndexItem]
+
+ _is_inside_context: bool = False
+
+ def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]):
+ self._root_dir = root_dir
+ self._hash_factory = hash_factory
+ self._digest_size = self._hash_factory().digest_size
+
+ self._index = {}
+ if (self._root_dir / "cas.idx").exists():
+ with (self._root_dir / "cas.idx").open("rb") as stream:
+ while True:
+ digest = stream.read(self._digest_size)
+ if not digest:
+ break
+ object_type = stream.read(4)
+ offset = int.from_bytes(stream.read(4))
+ size = int.from_bytes(stream.read(4))
+ self._index[digest] = IndexItem(object_type, offset, size)
+
+ @classmethod
+ def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas:
+ return SimpleCas(
+ root_dir = root_dir,
+ hash_factory = hashlib.new(obj["hash"]), # type: ignore
+ )
+
+ def __enter__(self):
+ assert not self._is_inside_context
+ self._is_inside_context = True
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ assert self._is_inside_context
+ self._is_inside_context = False
+
+ def __len__(self) -> int:
+ return len(self._index)
+
+ def __contains__(self, digest: bytes) -> bool:
+ assert len(digest) == self._digest_size
+ return digest in self._index
+
+ def read(self, digest: bytes) -> Optional[Object]:
+ item = self._index.get(digest)
+ if item is None:
+ return None
+
+ with (self._root_dir / "cas.dat").open("rb") as stream:
+ stream.seek(item.offset)
+ assert stream.read(self._digest_size) == digest
+ object_type = stream.read(4)
+ assert object_type == item.object_type
+ size = int.from_bytes(stream.read(4))
+ assert size == item.size
+ data = stream.read(size)
+
+ return Object(object_type, data)
+
+ def write(self, object_type: bytes, data: bytes) -> bytes:
+ assert len(object_type) == 4
+ assert len(data) < 2**32
+
+ hash = self._hash_factory()
+ hash.update(object_type)
+ hash.update(b"\0")
+ hash.update(len(data).to_bytes(4))
+ hash.update(b"\0")
+ hash.update(data)
+ digest = hash.digest()
+
+ if digest not in self:
+ with self._open_writer(digest, object_type, len(data)) as out:
+ out.write(digest)
+ out.write(object_type)
+ out.write(len(data).to_bytes(4))
+ out.write(data)
+
+ return digest
+
+ def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO:
+ dat_file = (self._root_dir / "cas.dat").open("ab")
+ offset = dat_file.tell()
+ self._index[digest] = IndexItem(object_type, offset, size)
+
+ with (self._root_dir / "cas.idx").open("ab") as idx_file:
+ idx_file.write(digest)
+ idx_file.write(object_type)
+ idx_file.write(offset.to_bytes(4))
+ idx_file.write(size.to_bytes(4))
+
+ return dat_file
+
+
+@dataclass
+class Object:
+ object_type: bytes
+ data: bytes
+
+@dataclass
+class IndexItem:
+ object_type: bytes
+ offset: int
+ size: int
diff --git a/src/bsv/simple_cas/util.py b/src/bsv/simple_cas/util.py
new file mode 100644
index 0000000..fd31b33
--- /dev/null
+++ b/src/bsv/simple_cas/util.py
@@ -0,0 +1,31 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+
+class Hash(ABC):
+ name: str
+ digest_size: int
+
+ @abstractmethod
+ def update(self, *data: bytes | bytearray | memoryview):
+ ...
+
+ @abstractmethod
+ def digest(self) -> bytes:
+ ...
diff --git a/tests/test_simple_cas.py b/tests/test_simple_cas.py
new file mode 100644
index 0000000..c219fe7
--- /dev/null
+++ b/tests/test_simple_cas.py
@@ -0,0 +1,76 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+from hashlib import sha256
+from pathlib import Path
+
+from tempfile import TemporaryDirectory
+import pytest
+
+from bsv.simple_cas.cas import SimpleCas
+
+
+@pytest.fixture
+def tmp_dir():
+ with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
+ yield Path(tmp_dir)
+
+
+@pytest.fixture
+def cas(tmp_dir):
+ cas = SimpleCas(
+ tmp_dir,
+ sha256, # type: ignore
+ )
+ with cas:
+ yield cas
+
+
+def test_simple_cas(tmp_dir: Path):
+ cas = SimpleCas(
+ tmp_dir,
+ sha256, # type: ignore
+ )
+ with cas:
+ assert len(cas) == 0
+
+ data = b"Hello World!"
+ digest = cas.write(b"blob", data)
+
+ assert len(cas) == 1
+ assert digest in cas
+
+ obj = cas.read(digest)
+ assert obj is not None
+ assert obj.object_type == b"blob"
+ assert obj.data == data
+
+ cas = SimpleCas(
+ tmp_dir,
+ sha256, # type: ignore
+ )
+ with cas:
+ assert len(cas) == 1
+ assert digest in cas
+
+ obj = cas.read(digest)
+ assert obj is not None
+ assert obj.object_type == b"blob"
+ assert obj.data == data
+
+ digest2 = cas.write(b"blob", data)
+ assert digest2 == digest
+ assert len(cas) == 1