6 changed files with 257 additions and 2 deletions
@ -0,0 +1,134 @@ |
|||
# bsv - Backup, Synchronization, Versioning |
|||
# Copyright (C) 2023 Simon Boyé |
|||
# |
|||
# This program is free software: you can redistribute it and/or modify |
|||
# it under the terms of the GNU Affero General Public License as published by |
|||
# the Free Software Foundation, either version 3 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU Affero General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Affero General Public License |
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|||
from __future__ import annotations |
|||
|
|||
from dataclasses import dataclass |
|||
import hashlib |
|||
from pathlib import Path |
|||
from typing import Any, BinaryIO, Callable, Optional |
|||
|
|||
from bsv.simple_cas.util import Hash |
|||
|
|||
|
|||
class SimpleCas: |
|||
_root_dir: Path |
|||
_hash_factory: Callable[[], Hash] |
|||
_digest_size: int |
|||
|
|||
_index: dict[bytes, IndexItem] |
|||
|
|||
_is_inside_context: bool = False |
|||
|
|||
def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]): |
|||
self._root_dir = root_dir |
|||
self._hash_factory = hash_factory |
|||
self._digest_size = self._hash_factory().digest_size |
|||
|
|||
self._index = {} |
|||
if (self._root_dir / "cas.idx").exists(): |
|||
with (self._root_dir / "cas.idx").open("rb") as stream: |
|||
while True: |
|||
digest = stream.read(self._digest_size) |
|||
if not digest: |
|||
break |
|||
object_type = stream.read(4) |
|||
offset = int.from_bytes(stream.read(4)) |
|||
size = int.from_bytes(stream.read(4)) |
|||
self._index[digest] = IndexItem(object_type, offset, size) |
|||
|
|||
@classmethod |
|||
def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas: |
|||
return SimpleCas( |
|||
root_dir = root_dir, |
|||
hash_factory = hashlib.new(obj["hash"]), # type: ignore |
|||
) |
|||
|
|||
def __enter__(self): |
|||
assert not self._is_inside_context |
|||
self._is_inside_context = True |
|||
|
|||
def __exit__(self, exc_type, exc_value, traceback): |
|||
assert self._is_inside_context |
|||
self._is_inside_context = False |
|||
|
|||
def __len__(self) -> int: |
|||
return len(self._index) |
|||
|
|||
def __contains__(self, digest: bytes) -> bool: |
|||
assert len(digest) == self._digest_size |
|||
return digest in self._index |
|||
|
|||
def read(self, digest: bytes) -> Optional[Object]: |
|||
item = self._index.get(digest) |
|||
if item is None: |
|||
return None |
|||
|
|||
with (self._root_dir / "cas.dat").open("rb") as stream: |
|||
stream.seek(item.offset) |
|||
assert stream.read(self._digest_size) == digest |
|||
object_type = stream.read(4) |
|||
assert object_type == item.object_type |
|||
size = int.from_bytes(stream.read(4)) |
|||
assert size == item.size |
|||
data = stream.read(size) |
|||
|
|||
return Object(object_type, data) |
|||
|
|||
def write(self, object_type: bytes, data: bytes) -> bytes: |
|||
assert len(object_type) == 4 |
|||
assert len(data) < 2**32 |
|||
|
|||
hash = self._hash_factory() |
|||
hash.update(object_type) |
|||
hash.update(b"\0") |
|||
hash.update(len(data).to_bytes(4)) |
|||
hash.update(b"\0") |
|||
hash.update(data) |
|||
digest = hash.digest() |
|||
|
|||
if digest not in self: |
|||
with self._open_writer(digest, object_type, len(data)) as out: |
|||
out.write(digest) |
|||
out.write(object_type) |
|||
out.write(len(data).to_bytes(4)) |
|||
out.write(data) |
|||
|
|||
return digest |
|||
|
|||
def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO: |
|||
dat_file = (self._root_dir / "cas.dat").open("ab") |
|||
offset = dat_file.tell() |
|||
self._index[digest] = IndexItem(object_type, offset, size) |
|||
|
|||
with (self._root_dir / "cas.idx").open("ab") as idx_file: |
|||
idx_file.write(digest) |
|||
idx_file.write(object_type) |
|||
idx_file.write(offset.to_bytes(4)) |
|||
idx_file.write(size.to_bytes(4)) |
|||
|
|||
return dat_file |
|||
|
|||
|
|||
@dataclass |
|||
class Object: |
|||
object_type: bytes |
|||
data: bytes |
|||
|
|||
@dataclass |
|||
class IndexItem: |
|||
object_type: bytes |
|||
offset: int |
|||
size: int |
|||
@ -0,0 +1,31 @@ |
|||
# bsv - Backup, Synchronization, Versioning |
|||
# Copyright (C) 2023 Simon Boyé |
|||
# |
|||
# This program is free software: you can redistribute it and/or modify |
|||
# it under the terms of the GNU Affero General Public License as published by |
|||
# the Free Software Foundation, either version 3 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU Affero General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Affero General Public License |
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|||
from __future__ import annotations |
|||
|
|||
from abc import ABC, abstractmethod |
|||
|
|||
|
|||
class Hash(ABC): |
|||
name: str |
|||
digest_size: int |
|||
|
|||
@abstractmethod |
|||
def update(self, *data: bytes | bytearray | memoryview): |
|||
... |
|||
|
|||
@abstractmethod |
|||
def digest(self) -> bytes: |
|||
... |
|||
@ -0,0 +1,76 @@ |
|||
# bsv - Backup, Synchronization, Versioning |
|||
# Copyright (C) 2023 Simon Boyé |
|||
# |
|||
# This program is free software: you can redistribute it and/or modify |
|||
# it under the terms of the GNU Affero General Public License as published by |
|||
# the Free Software Foundation, either version 3 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU Affero General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU Affero General Public License |
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
|||
from __future__ import annotations |
|||
from hashlib import sha256 |
|||
from pathlib import Path |
|||
|
|||
from tempfile import TemporaryDirectory |
|||
import pytest |
|||
|
|||
from bsv.simple_cas.cas import SimpleCas |
|||
|
|||
|
|||
@pytest.fixture |
|||
def tmp_dir(): |
|||
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir: |
|||
yield Path(tmp_dir) |
|||
|
|||
|
|||
@pytest.fixture |
|||
def cas(tmp_dir): |
|||
cas = SimpleCas( |
|||
tmp_dir, |
|||
sha256, # type: ignore |
|||
) |
|||
with cas: |
|||
yield cas |
|||
|
|||
|
|||
def test_simple_cas(tmp_dir: Path): |
|||
cas = SimpleCas( |
|||
tmp_dir, |
|||
sha256, # type: ignore |
|||
) |
|||
with cas: |
|||
assert len(cas) == 0 |
|||
|
|||
data = b"Hello World!" |
|||
digest = cas.write(b"blob", data) |
|||
|
|||
assert len(cas) == 1 |
|||
assert digest in cas |
|||
|
|||
obj = cas.read(digest) |
|||
assert obj is not None |
|||
assert obj.object_type == b"blob" |
|||
assert obj.data == data |
|||
|
|||
cas = SimpleCas( |
|||
tmp_dir, |
|||
sha256, # type: ignore |
|||
) |
|||
with cas: |
|||
assert len(cas) == 1 |
|||
assert digest in cas |
|||
|
|||
obj = cas.read(digest) |
|||
assert obj is not None |
|||
assert obj.object_type == b"blob" |
|||
assert obj.data == data |
|||
|
|||
digest2 = cas.write(b"blob", data) |
|||
assert digest2 == digest |
|||
assert len(cas) == 1 |
|||
Loading…
Reference in new issue