diff --git a/src/bsv/exception.py b/src/bsv/exception.py new file mode 100644 index 0000000..c38559c --- /dev/null +++ b/src/bsv/exception.py @@ -0,0 +1,29 @@ +# bsv - Backup, Synchronization, Versioning +# Copyright (C) 2023 Simon Boyé +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +from __future__ import annotations + + +class BsvError(RuntimeError): + pass + +class NotFound(BsvError): + pass + +class UnexpectedObjectType(BsvError): + pass + +class ConfigError(BsvError): + pass diff --git a/src/bsv/object.py b/src/bsv/object.py new file mode 100644 index 0000000..4b817fd --- /dev/null +++ b/src/bsv/object.py @@ -0,0 +1,41 @@ +# bsv - Backup, Synchronization, Versioning +# Copyright (C) 2023 Simon Boyé +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, order=True, slots=True) +class Digest: + digest: bytes + + def __repr__(self) -> str: + return self.digest.hex() + + +@dataclass(frozen=True, order=True, slots=True) +class ObjectInfo: + digest: Digest + object_type: bytes + size: int + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>" + + +@dataclass(frozen=True, order=True, slots=True) +class Object(ObjectInfo): + data: bytes diff --git a/src/bsv/repository.py b/src/bsv/repository.py index cb0dce5..ec52f25 100644 --- a/src/bsv/repository.py +++ b/src/bsv/repository.py @@ -16,7 +16,7 @@ from __future__ import annotations from dataclasses import dataclass -from datetime import UTC, datetime as DateTime, timedelta as TimeDelta +from datetime import datetime as DateTime import hashlib from io import BytesIO from pathlib import Path, PurePosixPath @@ -28,9 +28,10 @@ from fastcdc import fastcdc import tomlkit from bsv import __version__ +from bsv.exception import ConfigError from bsv.simple_cas import SimpleCas -from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas -from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof +from bsv.simple_cas.cas import Digest, SimpleCas +from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time DEFAULT_MIN_CHUNK_SIZE = 1 << 12 @@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim raise ConfigError(f"unknown cas name {cas_name}") -@dataclass + + +@dataclass(slots=True) class ChunkedObject: repo: Repository size: int @@ -229,16 +232,41 @@ class ChunkedObject: self.chunks.append(chunk) return self + def reader(self) -> ChunkedObjectReader: + return ChunkedObjectReader(self) -@dataclass -class Blob(ChunkedObject): + +@dataclass(frozen=True, slots=True) +class Chunk: + digest: Digest + size: int + + @classmethod + def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None: + digest_bytes = read_exact_or_eof(stream, digest_size) + if digest_bytes is None: + return None + digest = Digest(digest_bytes) + + return cls( + digest = digest, + size = int.from_bytes(read_exact(stream, 4)), + ) + + +class ChunkedObjectReader: + _chunked_object: ChunkedObject _chunk_index: int = 0 _chunk_data: bytes = b"" + def __init__(self, chunked_object: ChunkedObject): + self._chunked_object = chunked_object + def read(self, num_bytes: int = -1) -> bytes: + chunks = self._chunked_object.chunks parts = [self._chunk_data] size = len(parts[-1]) - while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks): + while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks): parts.append(self.read1()) size += len(parts[-1]) if num_bytes >= 0: @@ -248,13 +276,23 @@ class Blob(ChunkedObject): return b"".join(parts) def read1(self) -> bytes: - if self._chunk_index == len(self.chunks): + cas = self._chunked_object.repo._cas + chunks = self._chunked_object.chunks + if self._chunk_index == len(chunks): return b"" - object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk") + object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk") self._chunk_index += 1 return object.data + +@dataclass(slots=True) +class Blob(ChunkedObject): + pass + + + + @dataclass class Tree: repo: Repository @@ -283,8 +321,6 @@ class Tree: return stream.getvalue() -EPOCH = DateTime(1970, 1, 1, tzinfo=UTC) - @dataclass class TreeItem: name: str @@ -346,6 +382,8 @@ class TreeItem: stream.write(name_bytes) + + @dataclass class Snapshot: repo: Repository @@ -387,22 +425,6 @@ class Snapshot: return stream.getvalue() -@dataclass -class Chunk: - digest: Digest - size: int - - @classmethod - def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None: - digest_bytes = read_exact_or_eof(stream, digest_size) - if digest_bytes is None: - return None - digest = Digest(digest_bytes) - - return cls( - digest = digest, - size = int.from_bytes(read_exact(stream, 4)), - ) class PathPair: @@ -428,10 +450,3 @@ class PathPair: def __lt__(self, rhs: PathPair) -> bool: return self.bsv < rhs.bsv - - -def time_from_timestamp(timestamp: int) -> DateTime: - return EPOCH + TimeDelta(microseconds=timestamp) - -def timestamp_from_time(time: DateTime) -> int: - return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1) \ No newline at end of file diff --git a/src/bsv/simple_cas/cas.py b/src/bsv/simple_cas/cas.py index 643f9bc..f0b3577 100644 --- a/src/bsv/simple_cas/cas.py +++ b/src/bsv/simple_cas/cas.py @@ -19,21 +19,10 @@ from dataclasses import dataclass import hashlib from pathlib import Path from typing import Any, BinaryIO, Callable, Iterator +from bsv.exception import NotFound, UnexpectedObjectType +from bsv.object import Digest, Object, ObjectInfo -from bsv.simple_cas.util import Hash, read_exact_or_eof - - -class BsvError(RuntimeError): - pass - -class NotFound(BsvError): - pass - -class UnexpectedObjectType(BsvError): - pass - -class ConfigError(BsvError): - pass +from bsv.util import Hash, read_exact_or_eof class SimpleCas: @@ -105,7 +94,7 @@ class SimpleCas: assert size == item.size data = stream.read(size) - return Object(object_type, data) + return Object(digest, object_type, size, data) def write(self, object_type: bytes, data: bytes) -> Digest: assert len(object_type) == 4 @@ -164,22 +153,6 @@ class SimpleCas: return self._root_dir / "refs" / key_path -@dataclass(frozen=True, order=True, slots=True) -class Digest: - digest: bytes - - def __repr__(self) -> str: - return self.digest.hex() - - -@dataclass -class Object: - object_type: bytes - data: bytes - - def __repr__(self) -> str: - return f"" - @dataclass class IndexItem: object_type: bytes @@ -188,12 +161,3 @@ class IndexItem: def __repr__(self) -> str: return f"" - -@dataclass -class ObjectInfo: - digest: Digest - object_type: bytes - size: int - - def __repr__(self) -> str: - return f"" diff --git a/src/bsv/simple_cas/util.py b/src/bsv/util.py similarity index 81% rename from src/bsv/simple_cas/util.py rename to src/bsv/util.py index 86ccddb..ed2dac2 100644 --- a/src/bsv/simple_cas/util.py +++ b/src/bsv/util.py @@ -16,9 +16,20 @@ from __future__ import annotations from abc import ABC, abstractmethod +from datetime import UTC, datetime as DateTime, timedelta as TimeDelta from typing import BinaryIO +EPOCH = DateTime(1970, 1, 1, tzinfo=UTC) + + +def time_from_timestamp(timestamp: int) -> DateTime: + return EPOCH + TimeDelta(microseconds=timestamp) + +def timestamp_from_time(time: DateTime) -> int: + return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1) + + def read_exact(stream: BinaryIO, num_bytes: int) -> bytes: data = stream.read(num_bytes) if len(data) != num_bytes: diff --git a/tests/test_repository.py b/tests/test_repository.py index 34965ef..fe6bdea 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -49,7 +49,7 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository): digest = repo.add_blob(stream) blob = repo.get_blob(digest) - data = blob.read() + data = blob.reader().read() with path.open("rb") as stream: assert data == stream.read() diff --git a/tests/test_simple_cas.py b/tests/test_simple_cas.py index 9e5e48a..8190d3f 100644 --- a/tests/test_simple_cas.py +++ b/tests/test_simple_cas.py @@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path): obj = cas.read(digest) assert obj is not None + assert obj.digest == digest assert obj.object_type == b"blob" + assert obj.size == len(data) assert obj.data == data cas = SimpleCas( @@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path): obj = cas.read(digest) assert obj is not None + assert obj.digest == digest assert obj.object_type == b"blob" + assert obj.size == len(data) assert obj.data == data digest2 = cas.write(b"blob", data)