Browse Source

Small refactoring with minor improvements.

master
Draklaw 2 years ago
parent
commit
7420d891d4
  1. 29
      src/bsv/exception.py
  2. 41
      src/bsv/object.py
  3. 83
      src/bsv/repository.py
  4. 44
      src/bsv/simple_cas/cas.py
  5. 11
      src/bsv/util.py
  6. 2
      tests/test_repository.py
  7. 4
      tests/test_simple_cas.py

29
src/bsv/exception.py

@ -0,0 +1,29 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
class BsvError(RuntimeError):
pass
class NotFound(BsvError):
pass
class UnexpectedObjectType(BsvError):
pass
class ConfigError(BsvError):
pass

41
src/bsv/object.py

@ -0,0 +1,41 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True, order=True, slots=True)
class Digest:
digest: bytes
def __repr__(self) -> str:
return self.digest.hex()
@dataclass(frozen=True, order=True, slots=True)
class ObjectInfo:
digest: Digest
object_type: bytes
size: int
def __repr__(self) -> str:
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
@dataclass(frozen=True, order=True, slots=True)
class Object(ObjectInfo):
data: bytes

83
src/bsv/repository.py

@ -16,7 +16,7 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
from datetime import datetime as DateTime
import hashlib
from io import BytesIO
from pathlib import Path, PurePosixPath
@ -28,9 +28,10 @@ from fastcdc import fastcdc
import tomlkit
from bsv import __version__
from bsv.exception import ConfigError
from bsv.simple_cas import SimpleCas
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
from bsv.simple_cas.cas import Digest, SimpleCas
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim
raise ConfigError(f"unknown cas name {cas_name}")
@dataclass
@dataclass(slots=True)
class ChunkedObject:
repo: Repository
size: int
@ -229,16 +232,41 @@ class ChunkedObject:
self.chunks.append(chunk)
return self
def reader(self) -> ChunkedObjectReader:
return ChunkedObjectReader(self)
@dataclass
class Blob(ChunkedObject):
@dataclass(frozen=True, slots=True)
class Chunk:
digest: Digest
size: int
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
digest_bytes = read_exact_or_eof(stream, digest_size)
if digest_bytes is None:
return None
digest = Digest(digest_bytes)
return cls(
digest = digest,
size = int.from_bytes(read_exact(stream, 4)),
)
class ChunkedObjectReader:
_chunked_object: ChunkedObject
_chunk_index: int = 0
_chunk_data: bytes = b""
def __init__(self, chunked_object: ChunkedObject):
self._chunked_object = chunked_object
def read(self, num_bytes: int = -1) -> bytes:
chunks = self._chunked_object.chunks
parts = [self._chunk_data]
size = len(parts[-1])
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
parts.append(self.read1())
size += len(parts[-1])
if num_bytes >= 0:
@ -248,13 +276,23 @@ class Blob(ChunkedObject):
return b"".join(parts)
def read1(self) -> bytes:
if self._chunk_index == len(self.chunks):
cas = self._chunked_object.repo._cas
chunks = self._chunked_object.chunks
if self._chunk_index == len(chunks):
return b""
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
self._chunk_index += 1
return object.data
@dataclass(slots=True)
class Blob(ChunkedObject):
pass
@dataclass
class Tree:
repo: Repository
@ -283,8 +321,6 @@ class Tree:
return stream.getvalue()
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
@dataclass
class TreeItem:
name: str
@ -346,6 +382,8 @@ class TreeItem:
stream.write(name_bytes)
@dataclass
class Snapshot:
repo: Repository
@ -387,22 +425,6 @@ class Snapshot:
return stream.getvalue()
@dataclass
class Chunk:
digest: Digest
size: int
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
digest_bytes = read_exact_or_eof(stream, digest_size)
if digest_bytes is None:
return None
digest = Digest(digest_bytes)
return cls(
digest = digest,
size = int.from_bytes(read_exact(stream, 4)),
)
class PathPair:
@ -428,10 +450,3 @@ class PathPair:
def __lt__(self, rhs: PathPair) -> bool:
return self.bsv < rhs.bsv
def time_from_timestamp(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
def timestamp_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)

44
src/bsv/simple_cas/cas.py

@ -19,21 +19,10 @@ from dataclasses import dataclass
import hashlib
from pathlib import Path
from typing import Any, BinaryIO, Callable, Iterator
from bsv.exception import NotFound, UnexpectedObjectType
from bsv.object import Digest, Object, ObjectInfo
from bsv.simple_cas.util import Hash, read_exact_or_eof
class BsvError(RuntimeError):
pass
class NotFound(BsvError):
pass
class UnexpectedObjectType(BsvError):
pass
class ConfigError(BsvError):
pass
from bsv.util import Hash, read_exact_or_eof
class SimpleCas:
@ -105,7 +94,7 @@ class SimpleCas:
assert size == item.size
data = stream.read(size)
return Object(object_type, data)
return Object(digest, object_type, size, data)
def write(self, object_type: bytes, data: bytes) -> Digest:
assert len(object_type) == 4
@ -164,22 +153,6 @@ class SimpleCas:
return self._root_dir / "refs" / key_path
@dataclass(frozen=True, order=True, slots=True)
class Digest:
digest: bytes
def __repr__(self) -> str:
return self.digest.hex()
@dataclass
class Object:
object_type: bytes
data: bytes
def __repr__(self) -> str:
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
@dataclass
class IndexItem:
object_type: bytes
@ -188,12 +161,3 @@ class IndexItem:
def __repr__(self) -> str:
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
@dataclass
class ObjectInfo:
digest: Digest
object_type: bytes
size: int
def __repr__(self) -> str:
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"

11
src/bsv/simple_cas/util.py → src/bsv/util.py

@ -16,9 +16,20 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
from typing import BinaryIO
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
def time_from_timestamp(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
def timestamp_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
data = stream.read(num_bytes)
if len(data) != num_bytes:

2
tests/test_repository.py

@ -49,7 +49,7 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository):
digest = repo.add_blob(stream)
blob = repo.get_blob(digest)
data = blob.read()
data = blob.reader().read()
with path.open("rb") as stream:
assert data == stream.read()

4
tests/test_simple_cas.py

@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
obj = cas.read(digest)
assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data
cas = SimpleCas(
@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
obj = cas.read(digest)
assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data
digest2 = cas.write(b"blob", data)

Loading…
Cancel
Save