Small refactoring with minor improvements.
This commit is contained in:
29
src/bsv/exception.py
Normal file
29
src/bsv/exception.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
class BsvError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NotFound(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnexpectedObjectType(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ConfigError(BsvError):
|
||||||
|
pass
|
||||||
41
src/bsv/object.py
Normal file
41
src/bsv/object.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class Digest:
|
||||||
|
digest: bytes
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return self.digest.hex()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class ObjectInfo:
|
||||||
|
digest: Digest
|
||||||
|
object_type: bytes
|
||||||
|
size: int
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class Object(ObjectInfo):
|
||||||
|
data: bytes
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
from datetime import datetime as DateTime
|
||||||
import hashlib
|
import hashlib
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path, PurePosixPath
|
from pathlib import Path, PurePosixPath
|
||||||
@@ -28,9 +28,10 @@ from fastcdc import fastcdc
|
|||||||
import tomlkit
|
import tomlkit
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.exception import ConfigError
|
||||||
from bsv.simple_cas import SimpleCas
|
from bsv.simple_cas import SimpleCas
|
||||||
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
|
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||||
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
|
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp, timestamp_from_time
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||||
@@ -212,7 +213,9 @@ def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> Sim
|
|||||||
raise ConfigError(f"unknown cas name {cas_name}")
|
raise ConfigError(f"unknown cas name {cas_name}")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
class ChunkedObject:
|
class ChunkedObject:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
size: int
|
size: int
|
||||||
@@ -229,16 +232,41 @@ class ChunkedObject:
|
|||||||
self.chunks.append(chunk)
|
self.chunks.append(chunk)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def reader(self) -> ChunkedObjectReader:
|
||||||
|
return ChunkedObjectReader(self)
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Blob(ChunkedObject):
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class Chunk:
|
||||||
|
digest: Digest
|
||||||
|
size: int
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
||||||
|
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||||
|
if digest_bytes is None:
|
||||||
|
return None
|
||||||
|
digest = Digest(digest_bytes)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
digest = digest,
|
||||||
|
size = int.from_bytes(read_exact(stream, 4)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkedObjectReader:
|
||||||
|
_chunked_object: ChunkedObject
|
||||||
_chunk_index: int = 0
|
_chunk_index: int = 0
|
||||||
_chunk_data: bytes = b""
|
_chunk_data: bytes = b""
|
||||||
|
|
||||||
|
def __init__(self, chunked_object: ChunkedObject):
|
||||||
|
self._chunked_object = chunked_object
|
||||||
|
|
||||||
def read(self, num_bytes: int = -1) -> bytes:
|
def read(self, num_bytes: int = -1) -> bytes:
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
parts = [self._chunk_data]
|
parts = [self._chunk_data]
|
||||||
size = len(parts[-1])
|
size = len(parts[-1])
|
||||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
|
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
|
||||||
parts.append(self.read1())
|
parts.append(self.read1())
|
||||||
size += len(parts[-1])
|
size += len(parts[-1])
|
||||||
if num_bytes >= 0:
|
if num_bytes >= 0:
|
||||||
@@ -248,13 +276,23 @@ class Blob(ChunkedObject):
|
|||||||
return b"".join(parts)
|
return b"".join(parts)
|
||||||
|
|
||||||
def read1(self) -> bytes:
|
def read1(self) -> bytes:
|
||||||
if self._chunk_index == len(self.chunks):
|
cas = self._chunked_object.repo._cas
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
|
if self._chunk_index == len(chunks):
|
||||||
return b""
|
return b""
|
||||||
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
|
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||||
self._chunk_index += 1
|
self._chunk_index += 1
|
||||||
return object.data
|
return object.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Blob(ChunkedObject):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Tree:
|
class Tree:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
@@ -283,8 +321,6 @@ class Tree:
|
|||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
|
|
||||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TreeItem:
|
class TreeItem:
|
||||||
name: str
|
name: str
|
||||||
@@ -346,6 +382,8 @@ class TreeItem:
|
|||||||
stream.write(name_bytes)
|
stream.write(name_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Snapshot:
|
class Snapshot:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
@@ -387,22 +425,6 @@ class Snapshot:
|
|||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Chunk:
|
|
||||||
digest: Digest
|
|
||||||
size: int
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
|
||||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
|
||||||
if digest_bytes is None:
|
|
||||||
return None
|
|
||||||
digest = Digest(digest_bytes)
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
digest = digest,
|
|
||||||
size = int.from_bytes(read_exact(stream, 4)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PathPair:
|
class PathPair:
|
||||||
@@ -428,10 +450,3 @@ class PathPair:
|
|||||||
|
|
||||||
def __lt__(self, rhs: PathPair) -> bool:
|
def __lt__(self, rhs: PathPair) -> bool:
|
||||||
return self.bsv < rhs.bsv
|
return self.bsv < rhs.bsv
|
||||||
|
|
||||||
|
|
||||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
|
||||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
|
||||||
|
|
||||||
def timestamp_from_time(time: DateTime) -> int:
|
|
||||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
|
||||||
@@ -19,21 +19,10 @@ from dataclasses import dataclass
|
|||||||
import hashlib
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Callable, Iterator
|
from typing import Any, BinaryIO, Callable, Iterator
|
||||||
|
from bsv.exception import NotFound, UnexpectedObjectType
|
||||||
|
from bsv.object import Digest, Object, ObjectInfo
|
||||||
|
|
||||||
from bsv.simple_cas.util import Hash, read_exact_or_eof
|
from bsv.util import Hash, read_exact_or_eof
|
||||||
|
|
||||||
|
|
||||||
class BsvError(RuntimeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class NotFound(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class UnexpectedObjectType(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ConfigError(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleCas:
|
class SimpleCas:
|
||||||
@@ -105,7 +94,7 @@ class SimpleCas:
|
|||||||
assert size == item.size
|
assert size == item.size
|
||||||
data = stream.read(size)
|
data = stream.read(size)
|
||||||
|
|
||||||
return Object(object_type, data)
|
return Object(digest, object_type, size, data)
|
||||||
|
|
||||||
def write(self, object_type: bytes, data: bytes) -> Digest:
|
def write(self, object_type: bytes, data: bytes) -> Digest:
|
||||||
assert len(object_type) == 4
|
assert len(object_type) == 4
|
||||||
@@ -164,22 +153,6 @@ class SimpleCas:
|
|||||||
return self._root_dir / "refs" / key_path
|
return self._root_dir / "refs" / key_path
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, order=True, slots=True)
|
|
||||||
class Digest:
|
|
||||||
digest: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return self.digest.hex()
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Object:
|
|
||||||
object_type: bytes
|
|
||||||
data: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class IndexItem:
|
class IndexItem:
|
||||||
object_type: bytes
|
object_type: bytes
|
||||||
@@ -188,12 +161,3 @@ class IndexItem:
|
|||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ObjectInfo:
|
|
||||||
digest: Digest
|
|
||||||
object_type: bytes
|
|
||||||
size: int
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"
|
|
||||||
|
|||||||
@@ -16,9 +16,20 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||||
from typing import BinaryIO
|
from typing import BinaryIO
|
||||||
|
|
||||||
|
|
||||||
|
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def time_from_timestamp(timestamp: int) -> DateTime:
|
||||||
|
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||||
|
|
||||||
|
def timestamp_from_time(time: DateTime) -> int:
|
||||||
|
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||||
|
|
||||||
|
|
||||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||||
data = stream.read(num_bytes)
|
data = stream.read(num_bytes)
|
||||||
if len(data) != num_bytes:
|
if len(data) != num_bytes:
|
||||||
@@ -49,7 +49,7 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
|||||||
digest = repo.add_blob(stream)
|
digest = repo.add_blob(stream)
|
||||||
|
|
||||||
blob = repo.get_blob(digest)
|
blob = repo.get_blob(digest)
|
||||||
data = blob.read()
|
data = blob.reader().read()
|
||||||
|
|
||||||
with path.open("rb") as stream:
|
with path.open("rb") as stream:
|
||||||
assert data == stream.read()
|
assert data == stream.read()
|
||||||
|
|||||||
@@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
cas = SimpleCas(
|
cas = SimpleCas(
|
||||||
@@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
digest2 = cas.write(b"blob", data)
|
digest2 = cas.write(b"blob", data)
|
||||||
|
|||||||
Reference in New Issue
Block a user