Browse Source

Small refactoring with minor improvements.

master
Draklaw 2 years ago
parent
commit
7420d891d4
  1. 29
      src/bsv/exception.py
  2. 41
      src/bsv/object.py
  3. 83
      src/bsv/repository.py
  4. 44
      src/bsv/simple_cas/cas.py
  5. 11
      src/bsv/util.py
  6. 2
      tests/test_repository.py
  7. 4
      tests/test_simple_cas.py

29
src/bsv/exception.py

@ -0,0 +1,29 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
class BsvError(RuntimeError):
pass
class NotFound(BsvError):
pass
class UnexpectedObjectType(BsvError):
pass
class ConfigError(BsvError):
pass

41
src/bsv/object.py

@ -0,0 +1,41 @@
# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023 Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True, order=True, slots=True)
class Digest:
digest: bytes
def __repr__(self) -> str:
return self.digest.hex()
@dataclass(frozen=True, order=True, slots=True)
class ObjectInfo:
digest: Digest
object_type: bytes
size: int
def __repr__(self) -> str:
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
@dataclass(frozen=True, order=True, slots=True)
class Object(ObjectInfo):
data: bytes

83
src/bsv/repository.py

@ -16,7 +16,7 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta from datetime import datetime as DateTime
import hashlib import hashlib
from io import BytesIO from io import BytesIO
from pathlib import Path, PurePosixPath from pathlib import Path, PurePosixPath
@ -28,9 +28,10 @@ from fastcdc import fastcdc
import tomlkit import tomlkit
from bsv import __version__ from bsv import __version__
from bsv.exception import ConfigError
from bsv.simple_cas import SimpleCas from bsv.simple_cas import SimpleCas
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas from bsv.simple_cas.cas import Digest, SimpleCas
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
DEFAULT_MIN_CHUNK_SIZE = 1 << 12 DEFAULT_MIN_CHUNK_SIZE = 1 << 12
@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim
raise ConfigError(f"unknown cas name {cas_name}") raise ConfigError(f"unknown cas name {cas_name}")
@dataclass
@dataclass(slots=True)
class ChunkedObject: class ChunkedObject:
repo: Repository repo: Repository
size: int size: int
@ -229,16 +232,41 @@ class ChunkedObject:
self.chunks.append(chunk) self.chunks.append(chunk)
return self return self
def reader(self) -> ChunkedObjectReader:
return ChunkedObjectReader(self)
@dataclass
class Blob(ChunkedObject): @dataclass(frozen=True, slots=True)
class Chunk:
digest: Digest
size: int
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
digest_bytes = read_exact_or_eof(stream, digest_size)
if digest_bytes is None:
return None
digest = Digest(digest_bytes)
return cls(
digest = digest,
size = int.from_bytes(read_exact(stream, 4)),
)
class ChunkedObjectReader:
_chunked_object: ChunkedObject
_chunk_index: int = 0 _chunk_index: int = 0
_chunk_data: bytes = b"" _chunk_data: bytes = b""
def __init__(self, chunked_object: ChunkedObject):
self._chunked_object = chunked_object
def read(self, num_bytes: int = -1) -> bytes: def read(self, num_bytes: int = -1) -> bytes:
chunks = self._chunked_object.chunks
parts = [self._chunk_data] parts = [self._chunk_data]
size = len(parts[-1]) size = len(parts[-1])
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks): while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
parts.append(self.read1()) parts.append(self.read1())
size += len(parts[-1]) size += len(parts[-1])
if num_bytes >= 0: if num_bytes >= 0:
@ -248,13 +276,23 @@ class Blob(ChunkedObject):
return b"".join(parts) return b"".join(parts)
def read1(self) -> bytes: def read1(self) -> bytes:
if self._chunk_index == len(self.chunks): cas = self._chunked_object.repo._cas
chunks = self._chunked_object.chunks
if self._chunk_index == len(chunks):
return b"" return b""
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk") object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
self._chunk_index += 1 self._chunk_index += 1
return object.data return object.data
@dataclass(slots=True)
class Blob(ChunkedObject):
pass
@dataclass @dataclass
class Tree: class Tree:
repo: Repository repo: Repository
@ -283,8 +321,6 @@ class Tree:
return stream.getvalue() return stream.getvalue()
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
@dataclass @dataclass
class TreeItem: class TreeItem:
name: str name: str
@ -346,6 +382,8 @@ class TreeItem:
stream.write(name_bytes) stream.write(name_bytes)
@dataclass @dataclass
class Snapshot: class Snapshot:
repo: Repository repo: Repository
@ -387,22 +425,6 @@ class Snapshot:
return stream.getvalue() return stream.getvalue()
@dataclass
class Chunk:
digest: Digest
size: int
@classmethod
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
digest_bytes = read_exact_or_eof(stream, digest_size)
if digest_bytes is None:
return None
digest = Digest(digest_bytes)
return cls(
digest = digest,
size = int.from_bytes(read_exact(stream, 4)),
)
class PathPair: class PathPair:
@ -428,10 +450,3 @@ class PathPair:
def __lt__(self, rhs: PathPair) -> bool: def __lt__(self, rhs: PathPair) -> bool:
return self.bsv < rhs.bsv return self.bsv < rhs.bsv
def time_from_timestamp(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
def timestamp_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)

44
src/bsv/simple_cas/cas.py

@ -19,21 +19,10 @@ from dataclasses import dataclass
import hashlib import hashlib
from pathlib import Path from pathlib import Path
from typing import Any, BinaryIO, Callable, Iterator from typing import Any, BinaryIO, Callable, Iterator
from bsv.exception import NotFound, UnexpectedObjectType
from bsv.object import Digest, Object, ObjectInfo
from bsv.simple_cas.util import Hash, read_exact_or_eof from bsv.util import Hash, read_exact_or_eof
class BsvError(RuntimeError):
pass
class NotFound(BsvError):
pass
class UnexpectedObjectType(BsvError):
pass
class ConfigError(BsvError):
pass
class SimpleCas: class SimpleCas:
@ -105,7 +94,7 @@ class SimpleCas:
assert size == item.size assert size == item.size
data = stream.read(size) data = stream.read(size)
return Object(object_type, data) return Object(digest, object_type, size, data)
def write(self, object_type: bytes, data: bytes) -> Digest: def write(self, object_type: bytes, data: bytes) -> Digest:
assert len(object_type) == 4 assert len(object_type) == 4
@ -164,22 +153,6 @@ class SimpleCas:
return self._root_dir / "refs" / key_path return self._root_dir / "refs" / key_path
@dataclass(frozen=True, order=True, slots=True)
class Digest:
digest: bytes
def __repr__(self) -> str:
return self.digest.hex()
@dataclass
class Object:
object_type: bytes
data: bytes
def __repr__(self) -> str:
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
@dataclass @dataclass
class IndexItem: class IndexItem:
object_type: bytes object_type: bytes
@ -188,12 +161,3 @@ class IndexItem:
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>" return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
@dataclass
class ObjectInfo:
digest: Digest
object_type: bytes
size: int
def __repr__(self) -> str:
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"

11
src/bsv/simple_cas/util.py → src/bsv/util.py

@ -16,9 +16,20 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
from typing import BinaryIO from typing import BinaryIO
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
def time_from_timestamp(timestamp: int) -> DateTime:
return EPOCH + TimeDelta(microseconds=timestamp)
def timestamp_from_time(time: DateTime) -> int:
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes: def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
data = stream.read(num_bytes) data = stream.read(num_bytes)
if len(data) != num_bytes: if len(data) != num_bytes:

2
tests/test_repository.py

@ -49,7 +49,7 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository):
digest = repo.add_blob(stream) digest = repo.add_blob(stream)
blob = repo.get_blob(digest) blob = repo.get_blob(digest)
data = blob.read() data = blob.reader().read()
with path.open("rb") as stream: with path.open("rb") as stream:
assert data == stream.read() assert data == stream.read()

4
tests/test_simple_cas.py

@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
obj = cas.read(digest) obj = cas.read(digest)
assert obj is not None assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob" assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data assert obj.data == data
cas = SimpleCas( cas = SimpleCas(
@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
obj = cas.read(digest) obj = cas.read(digest)
assert obj is not None assert obj is not None
assert obj.digest == digest
assert obj.object_type == b"blob" assert obj.object_type == b"blob"
assert obj.size == len(data)
assert obj.data == data assert obj.data == data
digest2 = cas.write(b"blob", data) digest2 = cas.write(b"blob", data)

Loading…
Cancel
Save