diff --git a/src/bsv/exception.py b/src/bsv/exception.py
new file mode 100644
index 0000000..c38559c
--- /dev/null
+++ b/src/bsv/exception.py
@@ -0,0 +1,29 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+
+
+class BsvError(RuntimeError):
+ pass
+
+class NotFound(BsvError):
+ pass
+
+class UnexpectedObjectType(BsvError):
+ pass
+
+class ConfigError(BsvError):
+ pass
diff --git a/src/bsv/object.py b/src/bsv/object.py
new file mode 100644
index 0000000..4b817fd
--- /dev/null
+++ b/src/bsv/object.py
@@ -0,0 +1,41 @@
+# bsv - Backup, Synchronization, Versioning
+# Copyright (C) 2023 Simon Boyé
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, order=True, slots=True)
+class Digest:
+ digest: bytes
+
+ def __repr__(self) -> str:
+ return self.digest.hex()
+
+
+@dataclass(frozen=True, order=True, slots=True)
+class ObjectInfo:
+ digest: Digest
+ object_type: bytes
+ size: int
+
+ def __repr__(self) -> str:
+ return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
+
+
+@dataclass(frozen=True, order=True, slots=True)
+class Object(ObjectInfo):
+ data: bytes
diff --git a/src/bsv/repository.py b/src/bsv/repository.py
index cb0dce5..ec52f25 100644
--- a/src/bsv/repository.py
+++ b/src/bsv/repository.py
@@ -16,7 +16,7 @@
from __future__ import annotations
from dataclasses import dataclass
-from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
+from datetime import datetime as DateTime
import hashlib
from io import BytesIO
from pathlib import Path, PurePosixPath
@@ -28,9 +28,10 @@ from fastcdc import fastcdc
import tomlkit
from bsv import __version__
+from bsv.exception import ConfigError
from bsv.simple_cas import SimpleCas
-from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
-from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
+from bsv.simple_cas.cas import Digest, SimpleCas
+from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
@@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim
raise ConfigError(f"unknown cas name {cas_name}")
-@dataclass
+
+
+@dataclass(slots=True)
class ChunkedObject:
repo: Repository
size: int
@@ -229,16 +232,41 @@ class ChunkedObject:
self.chunks.append(chunk)
return self
+ def reader(self) -> ChunkedObjectReader:
+ return ChunkedObjectReader(self)
-@dataclass
-class Blob(ChunkedObject):
+
+@dataclass(frozen=True, slots=True)
+class Chunk:
+ digest: Digest
+ size: int
+
+ @classmethod
+ def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
+ digest_bytes = read_exact_or_eof(stream, digest_size)
+ if digest_bytes is None:
+ return None
+ digest = Digest(digest_bytes)
+
+ return cls(
+ digest = digest,
+ size = int.from_bytes(read_exact(stream, 4)),
+ )
+
+
+class ChunkedObjectReader:
+ _chunked_object: ChunkedObject
_chunk_index: int = 0
_chunk_data: bytes = b""
+ def __init__(self, chunked_object: ChunkedObject):
+ self._chunked_object = chunked_object
+
def read(self, num_bytes: int = -1) -> bytes:
+ chunks = self._chunked_object.chunks
parts = [self._chunk_data]
size = len(parts[-1])
- while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
+ while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
parts.append(self.read1())
size += len(parts[-1])
if num_bytes >= 0:
@@ -248,13 +276,23 @@ class Blob(ChunkedObject):
return b"".join(parts)
def read1(self) -> bytes:
- if self._chunk_index == len(self.chunks):
+ cas = self._chunked_object.repo._cas
+ chunks = self._chunked_object.chunks
+ if self._chunk_index == len(chunks):
return b""
- object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
+ object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
self._chunk_index += 1
return object.data
+
+@dataclass(slots=True)
+class Blob(ChunkedObject):
+ pass
+
+
+
+
@dataclass
class Tree:
repo: Repository
@@ -283,8 +321,6 @@ class Tree:
return stream.getvalue()
-EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
-
@dataclass
class TreeItem:
name: str
@@ -346,6 +382,8 @@ class TreeItem:
stream.write(name_bytes)
+
+
@dataclass
class Snapshot:
repo: Repository
@@ -387,22 +425,6 @@ class Snapshot:
return stream.getvalue()
-@dataclass
-class Chunk:
- digest: Digest
- size: int
-
- @classmethod
- def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
- digest_bytes = read_exact_or_eof(stream, digest_size)
- if digest_bytes is None:
- return None
- digest = Digest(digest_bytes)
-
- return cls(
- digest = digest,
- size = int.from_bytes(read_exact(stream, 4)),
- )
class PathPair:
@@ -428,10 +450,3 @@ class PathPair:
def __lt__(self, rhs: PathPair) -> bool:
return self.bsv < rhs.bsv
-
-
-def time_from_timestamp(timestamp: int) -> DateTime:
- return EPOCH + TimeDelta(microseconds=timestamp)
-
-def timestamp_from_time(time: DateTime) -> int:
- return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
\ No newline at end of file
diff --git a/src/bsv/simple_cas/cas.py b/src/bsv/simple_cas/cas.py
index 643f9bc..f0b3577 100644
--- a/src/bsv/simple_cas/cas.py
+++ b/src/bsv/simple_cas/cas.py
@@ -19,21 +19,10 @@ from dataclasses import dataclass
import hashlib
from pathlib import Path
from typing import Any, BinaryIO, Callable, Iterator
+from bsv.exception import NotFound, UnexpectedObjectType
+from bsv.object import Digest, Object, ObjectInfo
-from bsv.simple_cas.util import Hash, read_exact_or_eof
-
-
-class BsvError(RuntimeError):
- pass
-
-class NotFound(BsvError):
- pass
-
-class UnexpectedObjectType(BsvError):
- pass
-
-class ConfigError(BsvError):
- pass
+from bsv.util import Hash, read_exact_or_eof
class SimpleCas:
@@ -105,7 +94,7 @@ class SimpleCas:
assert size == item.size
data = stream.read(size)
- return Object(object_type, data)
+ return Object(digest, object_type, size, data)
def write(self, object_type: bytes, data: bytes) -> Digest:
assert len(object_type) == 4
@@ -164,22 +153,6 @@ class SimpleCas:
return self._root_dir / "refs" / key_path
-@dataclass(frozen=True, order=True, slots=True)
-class Digest:
- digest: bytes
-
- def __repr__(self) -> str:
- return self.digest.hex()
-
-
-@dataclass
-class Object:
- object_type: bytes
- data: bytes
-
- def __repr__(self) -> str:
- return f"