Small refactoring with minor improvements.
This commit is contained in:
29
src/bsv/exception.py
Normal file
29
src/bsv/exception.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class BsvError(RuntimeError):
|
||||
pass
|
||||
|
||||
class NotFound(BsvError):
|
||||
pass
|
||||
|
||||
class UnexpectedObjectType(BsvError):
|
||||
pass
|
||||
|
||||
class ConfigError(BsvError):
|
||||
pass
|
||||
41
src/bsv/object.py
Normal file
41
src/bsv/object.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Digest:
|
||||
digest: bytes
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.digest.hex()
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class ObjectInfo:
|
||||
digest: Digest
|
||||
object_type: bytes
|
||||
size: int
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Object(ObjectInfo):
|
||||
data: bytes
|
||||
@@ -16,7 +16,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||
from datetime import datetime as DateTime
|
||||
import hashlib
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePosixPath
|
||||
@@ -28,9 +28,10 @@ from fastcdc import fastcdc
|
||||
import tomlkit
|
||||
|
||||
from bsv import __version__
|
||||
from bsv.exception import ConfigError
|
||||
from bsv.simple_cas import SimpleCas
|
||||
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
|
||||
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
|
||||
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
|
||||
|
||||
|
||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||
@@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim
|
||||
raise ConfigError(f"unknown cas name {cas_name}")
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ChunkedObject:
|
||||
repo: Repository
|
||||
size: int
|
||||
@@ -229,16 +232,41 @@ class ChunkedObject:
|
||||
self.chunks.append(chunk)
|
||||
return self
|
||||
|
||||
def reader(self) -> ChunkedObjectReader:
|
||||
return ChunkedObjectReader(self)
|
||||
|
||||
@dataclass
|
||||
class Blob(ChunkedObject):
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Chunk:
|
||||
digest: Digest
|
||||
size: int
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
digest = Digest(digest_bytes)
|
||||
|
||||
return cls(
|
||||
digest = digest,
|
||||
size = int.from_bytes(read_exact(stream, 4)),
|
||||
)
|
||||
|
||||
|
||||
class ChunkedObjectReader:
|
||||
_chunked_object: ChunkedObject
|
||||
_chunk_index: int = 0
|
||||
_chunk_data: bytes = b""
|
||||
|
||||
def __init__(self, chunked_object: ChunkedObject):
|
||||
self._chunked_object = chunked_object
|
||||
|
||||
def read(self, num_bytes: int = -1) -> bytes:
|
||||
chunks = self._chunked_object.chunks
|
||||
parts = [self._chunk_data]
|
||||
size = len(parts[-1])
|
||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
|
||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
|
||||
parts.append(self.read1())
|
||||
size += len(parts[-1])
|
||||
if num_bytes >= 0:
|
||||
@@ -248,13 +276,23 @@ class Blob(ChunkedObject):
|
||||
return b"".join(parts)
|
||||
|
||||
def read1(self) -> bytes:
|
||||
if self._chunk_index == len(self.chunks):
|
||||
cas = self._chunked_object.repo._cas
|
||||
chunks = self._chunked_object.chunks
|
||||
if self._chunk_index == len(chunks):
|
||||
return b""
|
||||
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||
self._chunk_index += 1
|
||||
return object.data
|
||||
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Blob(ChunkedObject):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tree:
|
||||
repo: Repository
|
||||
@@ -283,8 +321,6 @@ class Tree:
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||
|
||||
@dataclass
|
||||
class TreeItem:
|
||||
name: str
|
||||
@@ -346,6 +382,8 @@ class TreeItem:
|
||||
stream.write(name_bytes)
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class Snapshot:
|
||||
repo: Repository
|
||||
@@ -387,22 +425,6 @@ class Snapshot:
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
digest: Digest
|
||||
size: int
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
digest = Digest(digest_bytes)
|
||||
|
||||
return cls(
|
||||
digest = digest,
|
||||
size = int.from_bytes(read_exact(stream, 4)),
|
||||
)
|
||||
|
||||
|
||||
class PathPair:
|
||||
@@ -428,10 +450,3 @@ class PathPair:
|
||||
|
||||
def __lt__(self, rhs: PathPair) -> bool:
|
||||
return self.bsv < rhs.bsv
|
||||
|
||||
|
||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||
|
||||
def timestamp_from_time(time: DateTime) -> int:
|
||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||
@@ -19,21 +19,10 @@ from dataclasses import dataclass
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Callable, Iterator
|
||||
from bsv.exception import NotFound, UnexpectedObjectType
|
||||
from bsv.object import Digest, Object, ObjectInfo
|
||||
|
||||
from bsv.simple_cas.util import Hash, read_exact_or_eof
|
||||
|
||||
|
||||
class BsvError(RuntimeError):
|
||||
pass
|
||||
|
||||
class NotFound(BsvError):
|
||||
pass
|
||||
|
||||
class UnexpectedObjectType(BsvError):
|
||||
pass
|
||||
|
||||
class ConfigError(BsvError):
|
||||
pass
|
||||
from bsv.util import Hash, read_exact_or_eof
|
||||
|
||||
|
||||
class SimpleCas:
|
||||
@@ -105,7 +94,7 @@ class SimpleCas:
|
||||
assert size == item.size
|
||||
data = stream.read(size)
|
||||
|
||||
return Object(object_type, data)
|
||||
return Object(digest, object_type, size, data)
|
||||
|
||||
def write(self, object_type: bytes, data: bytes) -> Digest:
|
||||
assert len(object_type) == 4
|
||||
@@ -164,22 +153,6 @@ class SimpleCas:
|
||||
return self._root_dir / "refs" / key_path
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Digest:
|
||||
digest: bytes
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.digest.hex()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Object:
|
||||
object_type: bytes
|
||||
data: bytes
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
|
||||
|
||||
@dataclass
|
||||
class IndexItem:
|
||||
object_type: bytes
|
||||
@@ -188,12 +161,3 @@ class IndexItem:
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
||||
|
||||
@dataclass
|
||||
class ObjectInfo:
|
||||
digest: Digest
|
||||
object_type: bytes
|
||||
size: int
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||
|
||||
@@ -16,9 +16,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||
from typing import BinaryIO
|
||||
|
||||
|
||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||
|
||||
|
||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||
|
||||
def timestamp_from_time(time: DateTime) -> int:
|
||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||
|
||||
|
||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||
data = stream.read(num_bytes)
|
||||
if len(data) != num_bytes:
|
||||
@@ -49,7 +49,7 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
||||
digest = repo.add_blob(stream)
|
||||
|
||||
blob = repo.get_blob(digest)
|
||||
data = blob.read()
|
||||
data = blob.reader().read()
|
||||
|
||||
with path.open("rb") as stream:
|
||||
assert data == stream.read()
|
||||
|
||||
@@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
|
||||
|
||||
obj = cas.read(digest)
|
||||
assert obj is not None
|
||||
assert obj.digest == digest
|
||||
assert obj.object_type == b"blob"
|
||||
assert obj.size == len(data)
|
||||
assert obj.data == data
|
||||
|
||||
cas = SimpleCas(
|
||||
@@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
|
||||
|
||||
obj = cas.read(digest)
|
||||
assert obj is not None
|
||||
assert obj.digest == digest
|
||||
assert obj.object_type == b"blob"
|
||||
assert obj.size == len(data)
|
||||
assert obj.data == data
|
||||
|
||||
digest2 = cas.write(b"blob", data)
|
||||
|
||||
Reference in New Issue
Block a user