6 changed files with 257 additions and 2 deletions
@ -0,0 +1,134 @@ |
|||||
|
# bsv - Backup, Synchronization, Versioning |
||||
|
# Copyright (C) 2023 Simon Boyé |
||||
|
# |
||||
|
# This program is free software: you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU Affero General Public License as published by |
||||
|
# the Free Software Foundation, either version 3 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
# |
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU Affero General Public License for more details. |
||||
|
# |
||||
|
# You should have received a copy of the GNU Affero General Public License |
||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
from __future__ import annotations |
||||
|
|
||||
|
from dataclasses import dataclass |
||||
|
import hashlib |
||||
|
from pathlib import Path |
||||
|
from typing import Any, BinaryIO, Callable, Optional |
||||
|
|
||||
|
from bsv.simple_cas.util import Hash |
||||
|
|
||||
|
|
||||
|
class SimpleCas: |
||||
|
_root_dir: Path |
||||
|
_hash_factory: Callable[[], Hash] |
||||
|
_digest_size: int |
||||
|
|
||||
|
_index: dict[bytes, IndexItem] |
||||
|
|
||||
|
_is_inside_context: bool = False |
||||
|
|
||||
|
def __init__(self, root_dir: Path, hash_factory: Callable[[], Hash]): |
||||
|
self._root_dir = root_dir |
||||
|
self._hash_factory = hash_factory |
||||
|
self._digest_size = self._hash_factory().digest_size |
||||
|
|
||||
|
self._index = {} |
||||
|
if (self._root_dir / "cas.idx").exists(): |
||||
|
with (self._root_dir / "cas.idx").open("rb") as stream: |
||||
|
while True: |
||||
|
digest = stream.read(self._digest_size) |
||||
|
if not digest: |
||||
|
break |
||||
|
object_type = stream.read(4) |
||||
|
offset = int.from_bytes(stream.read(4)) |
||||
|
size = int.from_bytes(stream.read(4)) |
||||
|
self._index[digest] = IndexItem(object_type, offset, size) |
||||
|
|
||||
|
@classmethod |
||||
|
def from_obj(cls, root_dir: Path, obj: dict[str, Any]) -> SimpleCas: |
||||
|
return SimpleCas( |
||||
|
root_dir = root_dir, |
||||
|
hash_factory = hashlib.new(obj["hash"]), # type: ignore |
||||
|
) |
||||
|
|
||||
|
def __enter__(self): |
||||
|
assert not self._is_inside_context |
||||
|
self._is_inside_context = True |
||||
|
|
||||
|
def __exit__(self, exc_type, exc_value, traceback): |
||||
|
assert self._is_inside_context |
||||
|
self._is_inside_context = False |
||||
|
|
||||
|
def __len__(self) -> int: |
||||
|
return len(self._index) |
||||
|
|
||||
|
def __contains__(self, digest: bytes) -> bool: |
||||
|
assert len(digest) == self._digest_size |
||||
|
return digest in self._index |
||||
|
|
||||
|
def read(self, digest: bytes) -> Optional[Object]: |
||||
|
item = self._index.get(digest) |
||||
|
if item is None: |
||||
|
return None |
||||
|
|
||||
|
with (self._root_dir / "cas.dat").open("rb") as stream: |
||||
|
stream.seek(item.offset) |
||||
|
assert stream.read(self._digest_size) == digest |
||||
|
object_type = stream.read(4) |
||||
|
assert object_type == item.object_type |
||||
|
size = int.from_bytes(stream.read(4)) |
||||
|
assert size == item.size |
||||
|
data = stream.read(size) |
||||
|
|
||||
|
return Object(object_type, data) |
||||
|
|
||||
|
def write(self, object_type: bytes, data: bytes) -> bytes: |
||||
|
assert len(object_type) == 4 |
||||
|
assert len(data) < 2**32 |
||||
|
|
||||
|
hash = self._hash_factory() |
||||
|
hash.update(object_type) |
||||
|
hash.update(b"\0") |
||||
|
hash.update(len(data).to_bytes(4)) |
||||
|
hash.update(b"\0") |
||||
|
hash.update(data) |
||||
|
digest = hash.digest() |
||||
|
|
||||
|
if digest not in self: |
||||
|
with self._open_writer(digest, object_type, len(data)) as out: |
||||
|
out.write(digest) |
||||
|
out.write(object_type) |
||||
|
out.write(len(data).to_bytes(4)) |
||||
|
out.write(data) |
||||
|
|
||||
|
return digest |
||||
|
|
||||
|
def _open_writer(self, digest: bytes, object_type: bytes, size: int) -> BinaryIO: |
||||
|
dat_file = (self._root_dir / "cas.dat").open("ab") |
||||
|
offset = dat_file.tell() |
||||
|
self._index[digest] = IndexItem(object_type, offset, size) |
||||
|
|
||||
|
with (self._root_dir / "cas.idx").open("ab") as idx_file: |
||||
|
idx_file.write(digest) |
||||
|
idx_file.write(object_type) |
||||
|
idx_file.write(offset.to_bytes(4)) |
||||
|
idx_file.write(size.to_bytes(4)) |
||||
|
|
||||
|
return dat_file |
||||
|
|
||||
|
|
||||
|
@dataclass |
||||
|
class Object: |
||||
|
object_type: bytes |
||||
|
data: bytes |
||||
|
|
||||
|
@dataclass |
||||
|
class IndexItem: |
||||
|
object_type: bytes |
||||
|
offset: int |
||||
|
size: int |
||||
@ -0,0 +1,31 @@ |
|||||
|
# bsv - Backup, Synchronization, Versioning |
||||
|
# Copyright (C) 2023 Simon Boyé |
||||
|
# |
||||
|
# This program is free software: you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU Affero General Public License as published by |
||||
|
# the Free Software Foundation, either version 3 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
# |
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU Affero General Public License for more details. |
||||
|
# |
||||
|
# You should have received a copy of the GNU Affero General Public License |
||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
from __future__ import annotations |
||||
|
|
||||
|
from abc import ABC, abstractmethod |
||||
|
|
||||
|
|
||||
|
class Hash(ABC): |
||||
|
name: str |
||||
|
digest_size: int |
||||
|
|
||||
|
@abstractmethod |
||||
|
def update(self, *data: bytes | bytearray | memoryview): |
||||
|
... |
||||
|
|
||||
|
@abstractmethod |
||||
|
def digest(self) -> bytes: |
||||
|
... |
||||
@ -0,0 +1,76 @@ |
|||||
|
# bsv - Backup, Synchronization, Versioning |
||||
|
# Copyright (C) 2023 Simon Boyé |
||||
|
# |
||||
|
# This program is free software: you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU Affero General Public License as published by |
||||
|
# the Free Software Foundation, either version 3 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
# |
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU Affero General Public License for more details. |
||||
|
# |
||||
|
# You should have received a copy of the GNU Affero General Public License |
||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>. |
||||
|
from __future__ import annotations |
||||
|
from hashlib import sha256 |
||||
|
from pathlib import Path |
||||
|
|
||||
|
from tempfile import TemporaryDirectory |
||||
|
import pytest |
||||
|
|
||||
|
from bsv.simple_cas.cas import SimpleCas |
||||
|
|
||||
|
|
||||
|
@pytest.fixture |
||||
|
def tmp_dir(): |
||||
|
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir: |
||||
|
yield Path(tmp_dir) |
||||
|
|
||||
|
|
||||
|
@pytest.fixture |
||||
|
def cas(tmp_dir): |
||||
|
cas = SimpleCas( |
||||
|
tmp_dir, |
||||
|
sha256, # type: ignore |
||||
|
) |
||||
|
with cas: |
||||
|
yield cas |
||||
|
|
||||
|
|
||||
|
def test_simple_cas(tmp_dir: Path): |
||||
|
cas = SimpleCas( |
||||
|
tmp_dir, |
||||
|
sha256, # type: ignore |
||||
|
) |
||||
|
with cas: |
||||
|
assert len(cas) == 0 |
||||
|
|
||||
|
data = b"Hello World!" |
||||
|
digest = cas.write(b"blob", data) |
||||
|
|
||||
|
assert len(cas) == 1 |
||||
|
assert digest in cas |
||||
|
|
||||
|
obj = cas.read(digest) |
||||
|
assert obj is not None |
||||
|
assert obj.object_type == b"blob" |
||||
|
assert obj.data == data |
||||
|
|
||||
|
cas = SimpleCas( |
||||
|
tmp_dir, |
||||
|
sha256, # type: ignore |
||||
|
) |
||||
|
with cas: |
||||
|
assert len(cas) == 1 |
||||
|
assert digest in cas |
||||
|
|
||||
|
obj = cas.read(digest) |
||||
|
assert obj is not None |
||||
|
assert obj.object_type == b"blob" |
||||
|
assert obj.data == data |
||||
|
|
||||
|
digest2 = cas.write(b"blob", data) |
||||
|
assert digest2 == digest |
||||
|
assert len(cas) == 1 |
||||
Loading…
Reference in new issue