Compare commits
7 Commits
67d15f989a
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| e74eaf0408 | |||
| d058cd0631 | |||
| 52a553d72b | |||
| eb6ae85698 | |||
| 073fd5e567 | |||
| 17bef2e63a | |||
| 7420d891d4 |
82
README.md
82
README.md
@@ -1,3 +1,83 @@
|
|||||||
# BSV - Backup, Synchronization, Versioning
|
# BSV - Backup, Synchronization, Versioning
|
||||||
|
|
||||||
Readme to be done.
|
Bsv is a tool to perform backups, file/directory synchronization between devices and light versioning. It's architecture is inspired from git, but focus on backup first, synchronization second and as a byproduct also allow versioning.
|
||||||
|
|
||||||
|
Bsv is in a very early stage of development and is not production ready yet.
|
||||||
|
|
||||||
|
|
||||||
|
## Development setup
|
||||||
|
|
||||||
|
Bsv currently only supports Python >= 3.11. Once you have a compatible Python version, just fetch the code and install bsv in editable mode in a virtual env:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://git.draklia.net/draklaw/pybsv.git
|
||||||
|
cd pybsv
|
||||||
|
python -m venv venv
|
||||||
|
. venv/bin/activate
|
||||||
|
pip install -e .[test]
|
||||||
|
```
|
||||||
|
|
||||||
|
This will automatically install the dependencies (including `pytest`). Happy hacking !
|
||||||
|
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
### v0.0.1 - Simple CAS + insert/remove files manually
|
||||||
|
|
||||||
|
Basic features. Naïve CAS implementation that store everything in a single big file with no option for removing objects. Supports a single "local" repository.
|
||||||
|
|
||||||
|
- [x] Simple CAS implementation (it's OK if it's naïve).
|
||||||
|
- [x] Content-based chunking to store files.
|
||||||
|
- [x] `bsv init` command to initialize bsv.
|
||||||
|
- [x] `bsv info` print useful information bsv configuration.
|
||||||
|
- [ ] `bsv log` show the history of snapshots.
|
||||||
|
- [ ] `bsv show <digest>` show the object `digest`.
|
||||||
|
- [ ] `bsv ls <bsv-path>` list files in a bsv directory.
|
||||||
|
- [ ] `bsv mkdir <bsv-path>` create a directory in bsv directly.
|
||||||
|
- [ ] `bsv add [-r] <fs-path> <bsv-path>` copy files from the filesystem to bsv.
|
||||||
|
- [ ] `bsv get [-r] <bsv-path> <fs-path>` copy files from bsv to the filesystem.
|
||||||
|
- [ ] `bsv rm [-r] <bsv-path>` create a directory in bsv directly.
|
||||||
|
|
||||||
|
### v0.0.2 - File map + snapshots
|
||||||
|
|
||||||
|
Add support for mapping files from BSV virtual file system to the actual file system. Add snapshot and restore commands.
|
||||||
|
|
||||||
|
- [ ] `bsv map` list mappings between bsv paths and filesystem paths.
|
||||||
|
- [ ] `bsv map add <bsv-path> <fs-path>` add a mapping.
|
||||||
|
- [ ] `bsv map remove <bsv-path> <fs-path>` remove a mapping.
|
||||||
|
- [ ] `bsv snapshot` capture a snapshot, i.e. ensure that mapped files in the repositories match what is on the filesystem.
|
||||||
|
- [ ] `bsv restore <snapshot> <fs-path>` update files on the filesystem to the version captured by `snapshot`.
|
||||||
|
|
||||||
|
### v0.0.3 - Multiple repository
|
||||||
|
|
||||||
|
Support multiple repository. Repository can be configured to store only metadata (typically for the local repository) or everything.
|
||||||
|
|
||||||
|
- [ ] Support repositories that store only metadata.
|
||||||
|
- [ ] `bsv repo [-v]` list all known repositories.
|
||||||
|
- [ ] `bsv repo create <name> <destination>` create a new repository.
|
||||||
|
- [ ] `bsv repo add [<name>] <destination>` add an already existing repository.
|
||||||
|
- [ ] `bsv repo remove <name>|<destination>` remove a repository.
|
||||||
|
- [ ] `bsv fetch [<name>]` fetch latest metadata from known repositories.
|
||||||
|
- [ ] `bsv sync` similar to `snapshot` + `fetch` + `restore`: Fetch latest changes from the repositories and update the filesystem to match. In case of conflict (file changed both in the repositories and locally), performs a snapshot first to ensure all conflicting versions are backed'up, then use some conflict-resolution strategy and warn the user.
|
||||||
|
|
||||||
|
### v0.0.4 - Proper CAS
|
||||||
|
|
||||||
|
- [ ] Safe concurrent access (e.g. when several devices use a shared repository).
|
||||||
|
- [ ] Support removing objects.
|
||||||
|
- [ ] Garbage collection (remove unreferenced objects).
|
||||||
|
- [ ] Use garbage collection to keep metadata-only repository clean.
|
||||||
|
|
||||||
|
### v0.0.5 - Some extra features
|
||||||
|
|
||||||
|
- [ ] `bsv tag <name> [<snapshot>] [-m <message>]` set/update a tag (an alias to a specific snapshot).
|
||||||
|
- [ ] Support for symlinks.
|
||||||
|
|
||||||
|
### Later
|
||||||
|
|
||||||
|
- [ ] `bsv watch` starts a daemon that watch changes in mapped directories and automatically create snapshots.
|
||||||
|
- [ ] `bsv http` starts an http server that expose an API + an interface to manipulate BSV. Allow to list files, explore history, download and upload files...
|
||||||
|
- [ ] Bsv protocol + client/server
|
||||||
|
- [ ] Custom rules for repository to select what must be stored or not.
|
||||||
|
- [ ] Create sensible rules for backup (keep a lot of recent versions, less for older versions).
|
||||||
|
- [ ] Add object set support (a kind of object that simply store a collection of objects). Can be used as tag.
|
||||||
|
- [ ] Add mail object ?
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ classifiers = [
|
|||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fastcdc",
|
"fastcdc",
|
||||||
|
"rich",
|
||||||
"tomlkit",
|
"tomlkit",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -22,9 +23,8 @@ test = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
# TODO
|
"Homepage" = "https://git.draklia.net/draklaw/pybsv"
|
||||||
# "Homepage" = "https://github.com/pypa/sampleproject"
|
"Bug Tracker" = "https://git.draklia.net/draklaw/pybsv/issues"
|
||||||
# "Bug Tracker" = "https://github.com/pypa/sampleproject/issues"
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
bsv = "bsv.main:main"
|
bsv = "bsv.main:main"
|
||||||
|
|||||||
126
src/bsv/cli.py
Normal file
126
src/bsv/cli.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
|
||||||
|
_console: Console | None = None
|
||||||
|
def get_console() -> Console:
|
||||||
|
assert _console is not None
|
||||||
|
return _console
|
||||||
|
|
||||||
|
_error_console: Console | None = None
|
||||||
|
def get_error_console() -> Console:
|
||||||
|
assert _error_console is not None
|
||||||
|
return _error_console
|
||||||
|
|
||||||
|
|
||||||
|
def init_consoles(color: str="auto"):
|
||||||
|
global _console
|
||||||
|
global _error_console
|
||||||
|
|
||||||
|
assert _console is None
|
||||||
|
assert _error_console is None
|
||||||
|
|
||||||
|
kwargs: dict[str, Any] = {
|
||||||
|
"tab_size": 4,
|
||||||
|
}
|
||||||
|
match color:
|
||||||
|
case "always":
|
||||||
|
kwargs["force_terminal"] = True
|
||||||
|
case "auto":
|
||||||
|
pass
|
||||||
|
case "never":
|
||||||
|
kwargs["no_color"] = True
|
||||||
|
|
||||||
|
_console = Console(
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
_error_console = Console(
|
||||||
|
stderr = True,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
PromptType = TypeVar("PromptType")
|
||||||
|
|
||||||
|
class NoDefaultType:
|
||||||
|
def __repr__(self):
|
||||||
|
return "NoDefault"
|
||||||
|
NoDefault = NoDefaultType()
|
||||||
|
|
||||||
|
def prompt(
|
||||||
|
prompt: str,
|
||||||
|
factory: Callable[[str], PromptType],
|
||||||
|
*,
|
||||||
|
console: Console | None = None,
|
||||||
|
default: PromptType | NoDefaultType = NoDefault,
|
||||||
|
show_default: bool = True,
|
||||||
|
) -> PromptType:
|
||||||
|
if console is None:
|
||||||
|
console = get_console()
|
||||||
|
|
||||||
|
prompt_text = Text(prompt, style="prompt")
|
||||||
|
prompt_text.end = ""
|
||||||
|
if show_default and default is not NoDefault:
|
||||||
|
prompt_text.append(" ")
|
||||||
|
prompt_text.append(f"({default})", style="prompt.default")
|
||||||
|
prompt_text.append(": ")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
value = console.input(prompt_text)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not value and not isinstance(default, NoDefaultType):
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return factory(value)
|
||||||
|
except ValueError as err:
|
||||||
|
console.print(err)
|
||||||
|
|
||||||
|
def prompt_confirmation(prompt: str, *, console: Console | None=None, default: bool=True) -> bool:
|
||||||
|
if console is None:
|
||||||
|
console = get_console()
|
||||||
|
|
||||||
|
prompt_text = Text(prompt, style="prompt")
|
||||||
|
prompt_text.end = ""
|
||||||
|
prompt_text.append(" ")
|
||||||
|
if default:
|
||||||
|
prompt_text.append("(Y/n)", style="prompt.default")
|
||||||
|
else:
|
||||||
|
prompt_text.append("(y/N)", style="prompt.default")
|
||||||
|
prompt_text.append(": ")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
value = console.input(prompt_text).strip().lower()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not value and not isinstance(default, NoDefaultType):
|
||||||
|
return default
|
||||||
|
if value not in "yn":
|
||||||
|
console.print("Please answer 'y' or 'n'.")
|
||||||
|
else:
|
||||||
|
return value == "y"
|
||||||
@@ -19,6 +19,7 @@ from argparse import ArgumentParser
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.cli import get_console
|
||||||
from bsv.command import command
|
from bsv.command import command
|
||||||
from bsv.repository import Repository
|
from bsv.repository import Repository
|
||||||
|
|
||||||
@@ -33,27 +34,29 @@ def init_parser(parser: ArgumentParser):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@command(init_parser)
|
@command(init_parser)
|
||||||
def info(repository_path: Path | None, verbosity: int=0) -> int:
|
def info(config_path: Path, verbosity: int=0) -> int:
|
||||||
"""Print informations about bsv: config file used, known repository, file mapping...
|
"""Print informations about bsv: config file used, known repository, file mapping...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
print(f"bsv v{__version__}")
|
print = get_console().print
|
||||||
|
|
||||||
if repository_path is None:
|
print(f"bsv [green]v{__version__}")
|
||||||
print("Repository path not found. Bsv is likely not setup on this device.")
|
|
||||||
|
if not config_path.exists():
|
||||||
|
print("bsv configuration not found. Bsv is likely not setup on this device.", style="red")
|
||||||
return 0
|
return 0
|
||||||
else:
|
|
||||||
print(f"Repository path: {repository_path}")
|
|
||||||
|
|
||||||
repo = Repository(repository_path)
|
repo = Repository(config_path)
|
||||||
|
|
||||||
print(f"Repository name: {repo.name}")
|
print(f"[blue]Config path: [bold yellow]{repo.config_path}")
|
||||||
|
print(f"[blue]Device name: [bold yellow]{repo.device_name}")
|
||||||
|
print(f"[blue]Local repository: [bold yellow]{repo._local_repository_path}")
|
||||||
|
|
||||||
if repo.path_map:
|
print("[blue]Path map:[/blue] (bsv path <-> filesystem path)")
|
||||||
print("Path map: (bsv path <-> filesystem path)")
|
if repo.path_map.pairs:
|
||||||
for pair in sorted(repo.path_map):
|
for pair in sorted(repo.path_map.pairs):
|
||||||
print(f" {pair.bsv} <-> {pair.fs}")
|
print(f" {pair.bsv} <-> {pair.fs}")
|
||||||
else:
|
else:
|
||||||
print("Path map is empty.")
|
print(" [bold yellow]No path mapped.")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -16,35 +16,40 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from os import getlogin
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
from bsv.command import command
|
from bsv.command import command
|
||||||
|
from bsv.repository import check_config_path, check_device_name, check_local_repository_path, create_repository
|
||||||
|
from bsv.util import default_local_repository_path
|
||||||
|
|
||||||
|
|
||||||
def init_parser(parser: ArgumentParser):
|
def init_parser(parser: ArgumentParser):
|
||||||
parser.add_argument(
|
|
||||||
"--name", "-d",
|
|
||||||
help = "Name of the repository. Default to system hostname.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--interactive", "-i",
|
"--interactive", "-i",
|
||||||
|
default = False,
|
||||||
action = "store_true",
|
action = "store_true",
|
||||||
help = "Prompt the user for configuration choices.",
|
help = "Prompt the user for configuration choices.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"destination",
|
"--local-repository", "-l",
|
||||||
type = Path,
|
type = Path,
|
||||||
|
default = default_local_repository_path(),
|
||||||
nargs = "?",
|
nargs = "?",
|
||||||
|
dest = "local_repository_path",
|
||||||
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--device-name", "-n",
|
||||||
|
default = platform.node(),
|
||||||
|
help = "Name of the device. Default to system hostname.",
|
||||||
|
)
|
||||||
|
|
||||||
@command(init_parser)
|
@command(init_parser)
|
||||||
def init(
|
def init(
|
||||||
repository_path: Path | None,
|
config_path: Path,
|
||||||
destination: Path | None = None,
|
device_name: str,
|
||||||
name: str | None = None,
|
local_repository_path: Path,
|
||||||
interactive: bool = False,
|
interactive: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Initialize a new bsv repository.
|
"""Initialize a new bsv repository.
|
||||||
@@ -52,60 +57,62 @@ def init(
|
|||||||
from datetime import datetime as DateTime
|
from datetime import datetime as DateTime
|
||||||
import tomlkit
|
import tomlkit
|
||||||
|
|
||||||
if name is None:
|
from bsv.cli import get_console, get_error_console, prompt, prompt_confirmation
|
||||||
name = platform.node()
|
|
||||||
|
|
||||||
if destination is None:
|
print = get_console().print
|
||||||
# TODO: Choose a sensible system-dependent path.
|
|
||||||
destination = Path.cwd()
|
def make_config_path(value: str) -> Path:
|
||||||
|
path = Path(value.strip())
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = path.resolve()
|
||||||
|
check_config_path(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def make_device_name(value: str) -> str:
|
||||||
|
device_name = value.strip()
|
||||||
|
check_device_name(device_name)
|
||||||
|
return device_name
|
||||||
|
|
||||||
|
def make_local_repository_path(value: str) -> Path:
|
||||||
|
path = Path(value)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = path.resolve()
|
||||||
|
check_local_repository_path(path)
|
||||||
|
return path
|
||||||
|
|
||||||
if interactive:
|
if interactive:
|
||||||
name = input(f"Repository name: (default to {name})\n").strip() or name
|
config_path = prompt("Bsv configuration file", make_config_path, default=config_path)
|
||||||
destination = Path(input(f"Destination: (default to {destination})\n").strip()) or destination
|
device_name = prompt("Device name", make_device_name, default=device_name)
|
||||||
if not destination.is_absolute():
|
local_repository_path = prompt("Destination", make_local_repository_path, default=local_repository_path)
|
||||||
destination = Path.cwd() / destination
|
|
||||||
|
|
||||||
if not name:
|
if not config_path.is_absolute():
|
||||||
raise RuntimeError("repository name cannot be empty")
|
config_path = config_path.resolve()
|
||||||
if not destination.parent.exists():
|
if not local_repository_path.is_absolute():
|
||||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
local_repository_path = local_repository_path.resolve()
|
||||||
if destination.exists() and not destination.is_dir():
|
|
||||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
|
||||||
if destination.exists() and len(list(destination.iterdir())):
|
|
||||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
destination.mkdir(exist_ok=True)
|
check_config_path(config_path)
|
||||||
except:
|
check_device_name(device_name)
|
||||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
check_local_repository_path(local_repository_path)
|
||||||
|
except ValueError as err:
|
||||||
|
get_error_console().print(err, style="bold red")
|
||||||
|
return 1
|
||||||
|
|
||||||
bsv_table = tomlkit.table()
|
print("Bsv repository will be created with the following settings:", style="green")
|
||||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
print("")
|
||||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
print(f"\t[blue]:page_facing_up: Config path[/blue]: [bold yellow]{config_path}")
|
||||||
bsv_table.add("name", name)
|
print(f"\t[blue]:computer: Device name[/blue]: [bold yellow]{device_name}")
|
||||||
bsv_table.add(tomlkit.nl())
|
print(f"\t[blue]:floppy_disk: Local repository[/blue]: [bold yellow]{local_repository_path}")
|
||||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
print("")
|
||||||
bsv_table.add("path_map", tomlkit.array())
|
|
||||||
|
|
||||||
cas_table = tomlkit.table()
|
if interactive:
|
||||||
cas_table.add("type", "simple")
|
if not prompt_confirmation("Create repository ?"):
|
||||||
cas_table.add("hash", "sha256")
|
return 1
|
||||||
|
|
||||||
doc = tomlkit.document()
|
create_repository(
|
||||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
config_path = config_path,
|
||||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
device_name = device_name,
|
||||||
doc.add(tomlkit.nl())
|
local_repository_path = local_repository_path,
|
||||||
doc.add("bsv", bsv_table)
|
)
|
||||||
doc.add(tomlkit.nl())
|
|
||||||
doc.add("cas", cas_table)
|
|
||||||
|
|
||||||
config_path = destination / "bsv_config.toml"
|
|
||||||
try:
|
|
||||||
stream = config_path.open("w", encoding="utf-8")
|
|
||||||
except:
|
|
||||||
raise RuntimeError("failed to open configuration file {config_path}")
|
|
||||||
|
|
||||||
with stream:
|
|
||||||
tomlkit.dump(doc, stream)
|
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
32
src/bsv/exception.py
Normal file
32
src/bsv/exception.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
class BsvError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NotFound(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnmappedPath(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnexpectedObjectType(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ConfigError(BsvError):
|
||||||
|
pass
|
||||||
@@ -22,7 +22,9 @@ import sys
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.cli import get_error_console, init_consoles
|
||||||
from bsv.command import init_commands
|
from bsv.command import init_commands
|
||||||
|
from bsv.util import default_bsv_config_path
|
||||||
|
|
||||||
|
|
||||||
def make_parser(
|
def make_parser(
|
||||||
@@ -31,10 +33,20 @@ def make_parser(
|
|||||||
) -> ArgumentParser:
|
) -> ArgumentParser:
|
||||||
parent_parser = ArgumentParser(add_help=False)
|
parent_parser = ArgumentParser(add_help=False)
|
||||||
parent_parser.add_argument(
|
parent_parser.add_argument(
|
||||||
"--repository",
|
"--color",
|
||||||
type = Path,
|
default = "auto",
|
||||||
|
choices = ("always", "auto", "never"),
|
||||||
help = dedent("""
|
help = dedent("""
|
||||||
Bsv repository path. Overides default paths and BSV_REPOSITORY environment variable.
|
Force or disable colors, or auto-detect terminal support.
|
||||||
|
""").strip(),
|
||||||
|
)
|
||||||
|
parent_parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
default = default_bsv_config_path(),
|
||||||
|
type = Path,
|
||||||
|
dest = "config_path",
|
||||||
|
help = dedent("""
|
||||||
|
Bsv config path. Overrides default paths and BSV_CONFIG environment variable.
|
||||||
""").strip(),
|
""").strip(),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -68,16 +80,16 @@ def main(
|
|||||||
)
|
)
|
||||||
arg_dict = vars(parser.parse_args(args or sys.argv[1:]))
|
arg_dict = vars(parser.parse_args(args or sys.argv[1:]))
|
||||||
|
|
||||||
repository_path: Path | None = arg_dict.pop("repository")
|
color = arg_dict.pop("color")
|
||||||
if repository_path is None and "BSV_REPOSITORY" in os.environ:
|
init_consoles(color=color)
|
||||||
repository_path = Path(os.environ["BSV_REPOSITORY"])
|
|
||||||
# else:
|
|
||||||
# for path in get_config_dirs():
|
|
||||||
# maybe_config_path = path / "config.toml"
|
|
||||||
# if maybe_config_path.is_file():
|
|
||||||
# config_path = maybe_config_path
|
|
||||||
# break
|
|
||||||
|
|
||||||
command = arg_dict.pop("command")
|
command = arg_dict.pop("command")
|
||||||
|
|
||||||
return command(repository_path=repository_path, **arg_dict)
|
try:
|
||||||
|
return command(**arg_dict)
|
||||||
|
except Exception as err:
|
||||||
|
get_error_console().print_exception()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
return 130
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|||||||
@@ -15,33 +15,30 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from dataclasses import dataclass
|
||||||
from typing import BinaryIO
|
|
||||||
|
|
||||||
|
|
||||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
data = stream.read(num_bytes)
|
class Digest:
|
||||||
if len(data) != num_bytes:
|
digest: bytes = b""
|
||||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
|
||||||
return data
|
|
||||||
|
|
||||||
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
def __bool__(self) -> bool:
|
||||||
data = stream.read(num_bytes)
|
return bool(self.digest)
|
||||||
if not data:
|
|
||||||
return None
|
def __repr__(self) -> str:
|
||||||
if len(data) != num_bytes:
|
return self.digest.hex()
|
||||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class Hash(ABC):
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
name: str
|
class ObjectInfo:
|
||||||
digest_size: int
|
digest: Digest
|
||||||
|
object_type: bytes
|
||||||
|
size: int
|
||||||
|
|
||||||
@abstractmethod
|
def __repr__(self) -> str:
|
||||||
def update(self, *data: bytes | bytearray | memoryview):
|
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||||
...
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def digest(self) -> bytes:
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
...
|
class Object(ObjectInfo):
|
||||||
|
data: bytes
|
||||||
89
src/bsv/path_map.py
Normal file
89
src/bsv/path_map.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from itertools import pairwise
|
||||||
|
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from bsv.exception import UnmappedPath
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(order=True, frozen=True, slots=True)
|
||||||
|
class PathPair:
|
||||||
|
bsv: PurePosixPath
|
||||||
|
fs: Path
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.bsv.is_absolute() or not self.fs.is_absolute():
|
||||||
|
raise ValueError("paths in path_map must be absolute")
|
||||||
|
super().__setattr__("fs", self.fs.resolve())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: dict[str, str]) -> PathPair:
|
||||||
|
return cls(
|
||||||
|
bsv = PurePosixPath(obj["bsv"]),
|
||||||
|
fs = Path(obj["fs"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PathMap:
|
||||||
|
pairs: list[PathPair]
|
||||||
|
|
||||||
|
def __init__(self, pairs: list[PathPair]=[]):
|
||||||
|
self.pairs = sorted(pairs)
|
||||||
|
for path0, path1 in pairwise(self.pairs):
|
||||||
|
if path0 == path1 or path1.bsv.relative_to(path0.bsv):
|
||||||
|
raise ValueError("bsv paths must be unique and independent")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
|
||||||
|
return cls([
|
||||||
|
PathPair.from_obj(item)
|
||||||
|
for item in obj
|
||||||
|
])
|
||||||
|
|
||||||
|
def mount_point(self, fs_path: Path) -> PathPair:
|
||||||
|
fs_path = fs_path.resolve()
|
||||||
|
best_pair = None
|
||||||
|
best_relative = None
|
||||||
|
for pair in self.pairs:
|
||||||
|
try:
|
||||||
|
relative = fs_path.relative_to(pair.fs)
|
||||||
|
if not best_pair or not best_relative or len(relative.parts) < len(best_relative.parts):
|
||||||
|
best_pair = pair
|
||||||
|
best_relative = relative
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if best_pair is None:
|
||||||
|
raise UnmappedPath(f"unmapped fs path {fs_path}")
|
||||||
|
return best_pair
|
||||||
|
|
||||||
|
def relative_bsv_path(self, fs_target: Path, relative_to: Path) -> PurePosixPath:
|
||||||
|
if not relative_to.is_absolute():
|
||||||
|
relative_to = fs_target / relative_to
|
||||||
|
|
||||||
|
fs_target = fs_target.resolve()
|
||||||
|
relative_to = relative_to.resolve()
|
||||||
|
|
||||||
|
target_mount_point = self.mount_point(fs_target)
|
||||||
|
base_mount_point = self.mount_point(relative_to)
|
||||||
|
|
||||||
|
raise NotImplemented("not implemented yet")
|
||||||
|
|
||||||
|
def clone(self) -> PathMap:
|
||||||
|
return PathMap(self.pairs)
|
||||||
@@ -16,21 +16,25 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
from datetime import datetime as DateTime
|
||||||
import hashlib
|
import hashlib
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path, PurePosixPath
|
from pathlib import Path, PurePosixPath
|
||||||
import platform
|
|
||||||
import tomllib
|
import tomllib
|
||||||
from typing import Any, BinaryIO, Callable, Type
|
from typing import TYPE_CHECKING, Any, BinaryIO, Self
|
||||||
|
|
||||||
from fastcdc import fastcdc
|
from fastcdc import fastcdc
|
||||||
import tomlkit
|
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.exception import ConfigError
|
||||||
|
from bsv.object import ObjectInfo
|
||||||
|
from bsv.path_map import PathMap
|
||||||
from bsv.simple_cas import SimpleCas
|
from bsv.simple_cas import SimpleCas
|
||||||
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
|
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||||
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
|
from bsv.util import default_bsv_config_path, default_local_repository_path, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||||
@@ -39,91 +43,191 @@ DEFAULT_MAX_CHUNK_SIZE = 1 << 20
|
|||||||
|
|
||||||
|
|
||||||
class Repository:
|
class Repository:
|
||||||
_path: Path
|
_config_path: Path
|
||||||
_name: str
|
_device_name: str
|
||||||
|
_local_repository_path: Path
|
||||||
|
|
||||||
_cas: SimpleCas
|
_cas: SimpleCas
|
||||||
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
||||||
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
||||||
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
||||||
|
|
||||||
_path_map: list[PathPair]
|
_path_map: PathMap
|
||||||
# _remotes: list[object]
|
# _remotes: list[object]
|
||||||
|
|
||||||
_context_depth: int = 0
|
_context_depth: int = 0
|
||||||
|
|
||||||
def __init__(self, path: Path):
|
def __init__(self, config_path: Path):
|
||||||
self._path = path
|
self._config_path = config_path.resolve()
|
||||||
|
|
||||||
with self.config_file.open("rb") as stream:
|
with self._config_path.open("rb") as stream:
|
||||||
config = tomllib.load(stream)
|
config = tomllib.load(stream)
|
||||||
|
|
||||||
bsv = config.get("bsv", {})
|
bsv = config.get("bsv", {})
|
||||||
|
def get(key: str) -> Any:
|
||||||
|
value = bsv.get(key)
|
||||||
|
if value is None:
|
||||||
|
raise ConfigError(f"invalid bsv configuration: missing bsv.{key} item")
|
||||||
|
return value
|
||||||
|
|
||||||
self._name = bsv.get("name") or platform.node()
|
self._device_name = get("device_name")
|
||||||
|
self._local_repository_path = Path(get("local_repository"))
|
||||||
|
self._min_chunk_size = get("min_chunk_size")
|
||||||
|
self._avg_chunk_size = get("avg_chunk_size")
|
||||||
|
self._max_chunk_size = get("max_chunk_size")
|
||||||
|
self._path_map = PathMap.from_obj(get("path_map"))
|
||||||
|
|
||||||
self._cas = make_cas(
|
self._cas = make_cas(self._local_repository_path)
|
||||||
bsv.get("cas"),
|
|
||||||
self._path,
|
|
||||||
lambda: hashlib.new(bsv.get("hash")), # type: ignore
|
@property
|
||||||
|
def config_path(self) -> Path:
|
||||||
|
return self._config_path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def device_name(self) -> str:
|
||||||
|
return self._device_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path_map(self) -> PathMap:
|
||||||
|
return self._path_map.clone()
|
||||||
|
|
||||||
|
def get_blob(self, digest: Digest) -> BlobObject:
|
||||||
|
with self:
|
||||||
|
obj, blob = self._read(digest, object_type=b"blob")
|
||||||
|
return BlobObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
blob = blob,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||||
|
with self:
|
||||||
|
return self._write(b"blob", stream, dry_run=dry_run)
|
||||||
|
|
||||||
|
def get_symlink(self, digest: Digest) -> SymlinkObject:
|
||||||
|
with self:
|
||||||
|
obj = self._cas.read(digest, object_type=b"slnk")
|
||||||
|
return SymlinkObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
symlink = Symlink.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_symlink(self, symlink: Symlink, *, dry_run: bool=False) -> SymlinkObject:
|
||||||
|
with self:
|
||||||
|
data = symlink.to_bytes()
|
||||||
|
return SymlinkObject(
|
||||||
|
digest = self._cas.write(b"slnk", data, dry_run=dry_run),
|
||||||
|
object_type = b"slnk",
|
||||||
|
size = len(data),
|
||||||
|
symlink = symlink,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_symlink_from_fs_target(self, fs_symlink: Path, fs_target: Path, *, dry_run: bool=False) -> SymlinkObject:
|
||||||
|
assert fs_symlink.is_absolute()
|
||||||
|
return self.add_symlink(
|
||||||
|
Symlink(
|
||||||
|
repo = self,
|
||||||
|
is_absolute = fs_target.is_absolute(),
|
||||||
|
target = self._path_map.relative_bsv_path(fs_target, relative_to=fs_symlink),
|
||||||
|
),
|
||||||
|
dry_run = dry_run,
|
||||||
)
|
)
|
||||||
self._min_chunk_size = bsv.get("min_chunk_size")
|
|
||||||
self._avg_chunk_size = bsv.get("avg_chunk_size")
|
|
||||||
self._max_chunk_size = bsv.get("max_chunk_size")
|
|
||||||
|
|
||||||
self._path_map = [
|
def get_tree(self, digest: Digest) -> TreeObject:
|
||||||
PathPair.from_obj(pair)
|
|
||||||
for pair in bsv.get("path_map", [])
|
|
||||||
]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self._path
|
|
||||||
|
|
||||||
@property
|
|
||||||
def config_file(self) -> Path:
|
|
||||||
return self.path / "bsv_config.toml"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> str:
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path_map(self) -> list[PathPair]:
|
|
||||||
return list(self._path_map)
|
|
||||||
|
|
||||||
def get_blob(self, digest: Digest) -> Blob:
|
|
||||||
with self:
|
with self:
|
||||||
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
|
obj = self._cas.read(digest, object_type=b"tree")
|
||||||
|
return TreeObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
tree = Tree.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
def add_blob(self, stream: BinaryIO) -> Digest:
|
def add_tree(self, tree: Tree, *, dry_run: bool=False) -> TreeObject:
|
||||||
with self:
|
with self:
|
||||||
return self._write(b"blob", stream)
|
data = tree.to_bytes()
|
||||||
|
return TreeObject(
|
||||||
|
digest = self._cas.write(b"tree", data, dry_run=dry_run),
|
||||||
|
object_type = b"tree",
|
||||||
|
size = len(data),
|
||||||
|
tree = tree,
|
||||||
|
)
|
||||||
|
|
||||||
def get_tree(self, digest: Digest) -> Tree:
|
def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> TreeObject:
|
||||||
|
from bsv.tree_walker import TreeWalker
|
||||||
|
walker = TreeWalker(self, dry_run=dry_run)
|
||||||
|
return walker.add_tree(path)
|
||||||
|
|
||||||
|
def get_snapshot(self, digest: Digest) -> SnapshotObject:
|
||||||
with self:
|
with self:
|
||||||
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
|
obj = self._cas.read(digest, object_type=b"snap")
|
||||||
|
return SnapshotObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
snapshot = Snapshot.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
def add_tree(self, tree: Tree) -> Digest:
|
def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> SnapshotObject:
|
||||||
with self:
|
with self:
|
||||||
return self._cas.write(b"tree", tree.to_bytes())
|
data = snapshot.to_bytes()
|
||||||
|
return SnapshotObject(
|
||||||
|
digest = self._cas.write(b"snap", data, dry_run=dry_run),
|
||||||
|
object_type = b"snap",
|
||||||
|
size = len(data),
|
||||||
|
snapshot = snapshot,
|
||||||
|
)
|
||||||
|
|
||||||
def get_snapshot(self, digest: Digest) -> Snapshot:
|
# def take_snapshot(
|
||||||
with self:
|
# self,
|
||||||
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
|
# parent_digests: list[Digest] = [],
|
||||||
|
# *,
|
||||||
|
# walker_type: Type[TreeWalker] | None = None,
|
||||||
|
# dry_run: bool = False,
|
||||||
|
# ):
|
||||||
|
# from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
def add_snapshot(self, snapshot: Snapshot) -> Digest:
|
# walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
|
||||||
with self:
|
|
||||||
return self._cas.write(b"snap", snapshot.to_bytes())
|
|
||||||
|
|
||||||
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
|
# # parents = [
|
||||||
|
# # self.get_snapshot(digest)
|
||||||
|
# # for digest in parent_digests
|
||||||
|
# # ]
|
||||||
|
# parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
|
||||||
|
|
||||||
|
# snapshot = Snapshot(
|
||||||
|
# repo = self,
|
||||||
|
# tree_digest = walker.add_virtual_tree(self._path_map, parent=),
|
||||||
|
# parents = parent_digests,
|
||||||
|
# repo_name = self._name,
|
||||||
|
# timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# )
|
||||||
|
# return self.add_snapshot(snapshot, dry_run=dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
def get_ref(self, key: str) -> Digest | None:
|
||||||
|
return self._cas.get_ref(key)
|
||||||
|
|
||||||
|
def set_ref(self, key: str, digest: Digest):
|
||||||
|
self._cas.set_ref(key, digest)
|
||||||
|
|
||||||
|
def get_head_snapshot(self) -> Digest | None:
|
||||||
|
return self.get_ref("HEAD")
|
||||||
|
|
||||||
|
|
||||||
|
def _read(self, digest: Digest, object_type: bytes) -> tuple[ObjectInfo, Blob]:
|
||||||
obj = self._cas.read(digest, object_type=object_type)
|
obj = self._cas.read(digest, object_type=object_type)
|
||||||
stream = BytesIO(obj.data)
|
stream = BytesIO(obj.data)
|
||||||
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
|
return obj, Blob.from_stream(self, stream, digest_size=self._cas._digest_size)
|
||||||
|
|
||||||
def _write(self, object_type: bytes, stream: BinaryIO) -> Digest:
|
def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||||
out = BytesIO()
|
out = BytesIO()
|
||||||
size = 0
|
size = 0
|
||||||
|
chunks = []
|
||||||
for chunk in fastcdc(
|
for chunk in fastcdc(
|
||||||
stream,
|
stream,
|
||||||
min_size = self._min_chunk_size,
|
min_size = self._min_chunk_size,
|
||||||
@@ -132,10 +236,20 @@ class Repository:
|
|||||||
fat = True,
|
fat = True,
|
||||||
):
|
):
|
||||||
size += chunk.length
|
size += chunk.length
|
||||||
digest = self._cas.write(b"chnk", chunk.data)
|
digest = self._cas.write(b"chnk", chunk.data, dry_run=dry_run)
|
||||||
|
chunks.append(Chunk(digest, chunk.length))
|
||||||
out.write(digest.digest)
|
out.write(digest.digest)
|
||||||
out.write(chunk.length.to_bytes(4))
|
out.write(chunk.length.to_bytes(4))
|
||||||
return self._cas.write(object_type, size.to_bytes(8) + out.getvalue())
|
return BlobObject(
|
||||||
|
digest = self._cas.write(object_type, size.to_bytes(8) + out.getvalue()),
|
||||||
|
object_type = object_type,
|
||||||
|
size = 8 + len(out.getvalue()),
|
||||||
|
blob = Blob(
|
||||||
|
repo = self,
|
||||||
|
size = size,
|
||||||
|
chunks = chunks,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
if self._context_depth == 0:
|
if self._context_depth == 0:
|
||||||
@@ -149,9 +263,30 @@ class Repository:
|
|||||||
return self._cas.__exit__(exc_type, exc_value, traceback)
|
return self._cas.__exit__(exc_type, exc_value, traceback)
|
||||||
|
|
||||||
|
|
||||||
|
def check_config_path(path: Path):
|
||||||
|
if path.exists():
|
||||||
|
raise ValueError(f"{path} already exists.")
|
||||||
|
if path != default_bsv_config_path() and not path.parent.is_dir():
|
||||||
|
raise ValueError(f"{path.parent} does not exist or is not a directory.")
|
||||||
|
|
||||||
|
def check_device_name(device_name: str):
|
||||||
|
if not device_name:
|
||||||
|
raise ValueError("Device name cannot be empty.")
|
||||||
|
if not device_name.isidentifier():
|
||||||
|
raise ValueError(f"{device_name} is not a valid device name.")
|
||||||
|
|
||||||
|
def check_local_repository_path(path: Path):
|
||||||
|
if path != default_local_repository_path() and not path.parent.exists():
|
||||||
|
raise ValueError(f"Directory {path.parent} does not exists.")
|
||||||
|
if path.exists() and not path.is_dir():
|
||||||
|
raise ValueError(f"{path} exists but is not a directory.")
|
||||||
|
if path.exists() and len(list(path.iterdir())):
|
||||||
|
raise ValueError(f"Local repository directory {path} is not empty.")
|
||||||
|
|
||||||
def create_repository(
|
def create_repository(
|
||||||
destination: Path,
|
config_path: Path,
|
||||||
name: str,
|
device_name: str,
|
||||||
|
local_repository_path: Path,
|
||||||
cas: str = "simple",
|
cas: str = "simple",
|
||||||
hash: str = "sha256",
|
hash: str = "sha256",
|
||||||
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
||||||
@@ -161,65 +296,108 @@ def create_repository(
|
|||||||
from datetime import datetime as DateTime
|
from datetime import datetime as DateTime
|
||||||
from os import getlogin
|
from os import getlogin
|
||||||
|
|
||||||
if not name:
|
import tomlkit
|
||||||
raise RuntimeError("repository name cannot be empty")
|
|
||||||
if not destination.parent.exists():
|
check_config_path(config_path)
|
||||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
check_device_name(device_name)
|
||||||
if destination.exists() and not destination.is_dir():
|
check_local_repository_path(local_repository_path)
|
||||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
|
||||||
if destination.exists() and len(list(destination.iterdir())):
|
if config_path == default_bsv_config_path():
|
||||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
try:
|
||||||
|
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to create bsv config destination directory {config_path.parent}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
destination.mkdir(exist_ok=True)
|
local_repository_path.mkdir(exist_ok=True)
|
||||||
except:
|
except:
|
||||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
raise RuntimeError(f"failed to create local repository directory {local_repository_path}")
|
||||||
|
|
||||||
bsv_table = tomlkit.table()
|
bsv_table = tomlkit.table()
|
||||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
|
||||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
bsv_table.add(tomlkit.comment("Name of the instance."))
|
||||||
bsv_table.add("name", name)
|
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected devices."))
|
||||||
|
bsv_table.add("device_name", device_name)
|
||||||
|
|
||||||
bsv_table.add(tomlkit.nl())
|
bsv_table.add(tomlkit.nl())
|
||||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
bsv_table.add(tomlkit.comment("Path to the local repository."))
|
||||||
bsv_table.add("path_map", tomlkit.array())
|
bsv_table.add("local_repository", str(local_repository_path))
|
||||||
bsv_table.add("cas", cas)
|
|
||||||
bsv_table.add("hash", hash)
|
bsv_table.add(tomlkit.nl())
|
||||||
|
bsv_table.add(tomlkit.comment("Properties of the content-based chunking algorithm."))
|
||||||
bsv_table.add("min_chunk_size", min_chunk_size)
|
bsv_table.add("min_chunk_size", min_chunk_size)
|
||||||
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
||||||
bsv_table.add("max_chunk_size", max_chunk_size)
|
bsv_table.add("max_chunk_size", max_chunk_size)
|
||||||
|
|
||||||
doc = tomlkit.document()
|
bsv_table.add(tomlkit.nl())
|
||||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the local filesystem."))
|
||||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
bsv_table.add("path_map", tomlkit.array())
|
||||||
doc.add(tomlkit.nl())
|
|
||||||
doc.add("bsv", bsv_table)
|
bsv_config = tomlkit.document()
|
||||||
|
bsv_config.add(tomlkit.comment("bsv device configuration"))
|
||||||
|
bsv_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||||
|
bsv_config.add(tomlkit.nl())
|
||||||
|
bsv_config.add("bsv", bsv_table)
|
||||||
|
|
||||||
|
|
||||||
|
cas_table = tomlkit.table()
|
||||||
|
cas_table.add("type", cas)
|
||||||
|
cas_table.add("hash", hash)
|
||||||
|
|
||||||
|
cas_config = tomlkit.document()
|
||||||
|
cas_config.add(tomlkit.comment(f"bsv local repository configuration for instance {config_path}."))
|
||||||
|
cas_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||||
|
cas_config.add(tomlkit.nl())
|
||||||
|
cas_config.add("cas", cas_table)
|
||||||
|
|
||||||
config_path = destination / "bsv_config.toml"
|
|
||||||
try:
|
try:
|
||||||
stream = config_path.open("w", encoding="utf-8")
|
bsv_stream = config_path.open("w", encoding="utf-8")
|
||||||
except:
|
except:
|
||||||
raise RuntimeError("failed to open configuration file {config_path}")
|
raise RuntimeError(f"failed to open bsv configuration file {config_path}")
|
||||||
|
try:
|
||||||
|
cas_stream = (local_repository_path / "bsv_repository.config").open("w", encoding="utf-8")
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to open local repository configuration file {config_path}")
|
||||||
|
|
||||||
with stream:
|
with bsv_stream:
|
||||||
tomlkit.dump(doc, stream)
|
tomlkit.dump(bsv_config, bsv_stream)
|
||||||
|
|
||||||
return Repository(destination)
|
with cas_stream:
|
||||||
|
tomlkit.dump(cas_config, cas_stream)
|
||||||
|
|
||||||
|
repo = Repository(config_path)
|
||||||
|
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> SimpleCas:
|
def make_cas(cas_config_path: Path) -> SimpleCas:
|
||||||
if cas_name == "simple":
|
with (cas_config_path / "bsv_repository.config").open("rb") as stream:
|
||||||
return SimpleCas(path, hash_factory)
|
config = tomllib.load(stream)
|
||||||
raise ConfigError(f"unknown cas name {cas_name}")
|
|
||||||
|
cas = config.get("cas", {})
|
||||||
|
def get(key: str) -> Any:
|
||||||
|
value = cas.get(key)
|
||||||
|
if value is None:
|
||||||
|
raise ConfigError(f"invalid repository configuration: missing {key} item")
|
||||||
|
return value
|
||||||
|
|
||||||
|
type = get("type")
|
||||||
|
hash_factory = lambda: hashlib.new(get("hash"))
|
||||||
|
if type == "simple":
|
||||||
|
return SimpleCas(cas_config_path, hash_factory) # type: ignore
|
||||||
|
raise ConfigError(f"unknown cas type {type}")
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
class ChunkedObject:
|
class ChunkedObject:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
size: int
|
size: int
|
||||||
chunks: list[Chunk]
|
chunks: list[Chunk]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> ChunkedObject:
|
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> Self:
|
||||||
self = cls(
|
self = cls(
|
||||||
repo = repo,
|
repo = repo,
|
||||||
size = int.from_bytes(read_exact(stream, 8)),
|
size = int.from_bytes(read_exact(stream, 8)),
|
||||||
@@ -229,16 +407,41 @@ class ChunkedObject:
|
|||||||
self.chunks.append(chunk)
|
self.chunks.append(chunk)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def reader(self) -> ChunkedObjectReader:
|
||||||
|
return ChunkedObjectReader(self)
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Blob(ChunkedObject):
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class Chunk:
|
||||||
|
digest: Digest
|
||||||
|
size: int
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||||
|
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||||
|
if digest_bytes is None:
|
||||||
|
return None
|
||||||
|
digest = Digest(digest_bytes)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
digest = digest,
|
||||||
|
size = int.from_bytes(read_exact(stream, 4)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkedObjectReader:
|
||||||
|
_chunked_object: ChunkedObject
|
||||||
_chunk_index: int = 0
|
_chunk_index: int = 0
|
||||||
_chunk_data: bytes = b""
|
_chunk_data: bytes = b""
|
||||||
|
|
||||||
|
def __init__(self, chunked_object: ChunkedObject):
|
||||||
|
self._chunked_object = chunked_object
|
||||||
|
|
||||||
def read(self, num_bytes: int = -1) -> bytes:
|
def read(self, num_bytes: int = -1) -> bytes:
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
parts = [self._chunk_data]
|
parts = [self._chunk_data]
|
||||||
size = len(parts[-1])
|
size = len(parts[-1])
|
||||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
|
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
|
||||||
parts.append(self.read1())
|
parts.append(self.read1())
|
||||||
size += len(parts[-1])
|
size += len(parts[-1])
|
||||||
if num_bytes >= 0:
|
if num_bytes >= 0:
|
||||||
@@ -248,27 +451,79 @@ class Blob(ChunkedObject):
|
|||||||
return b"".join(parts)
|
return b"".join(parts)
|
||||||
|
|
||||||
def read1(self) -> bytes:
|
def read1(self) -> bytes:
|
||||||
if self._chunk_index == len(self.chunks):
|
cas = self._chunked_object.repo._cas
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
|
if self._chunk_index == len(chunks):
|
||||||
return b""
|
return b""
|
||||||
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
|
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||||
self._chunk_index += 1
|
self._chunk_index += 1
|
||||||
return object.data
|
return object.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Blob(ChunkedObject):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class BlobObject(ObjectInfo):
|
||||||
|
blob: Blob
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Symlink:
|
||||||
|
repo: Repository
|
||||||
|
is_absolute: bool
|
||||||
|
target: PurePosixPath
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
|
return cls(
|
||||||
|
repo = repo,
|
||||||
|
is_absolute = bool(read_exact(stream, 1)),
|
||||||
|
target = PurePosixPath(stream.read().decode("utf-8")),
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_bytes(cls, repo: Repository, bytes: bytes) -> Self:
|
||||||
|
stream = BytesIO(bytes)
|
||||||
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
|
def write(self, stream: BinaryIO):
|
||||||
|
stream.write(self.is_absolute.to_bytes(1))
|
||||||
|
stream.write(self.target.as_posix().encode("utf-8"))
|
||||||
|
|
||||||
|
def to_bytes(self) -> bytes:
|
||||||
|
stream = BytesIO()
|
||||||
|
self.write(stream)
|
||||||
|
return stream.getvalue()
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class SymlinkObject(ObjectInfo):
|
||||||
|
symlink: Symlink
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Tree:
|
class Tree:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
items: list[TreeItem]
|
items: list[TreeItem]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_size(self) -> int:
|
||||||
|
return sum(
|
||||||
|
item.size
|
||||||
|
for item in self.items
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Tree:
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
tree = Tree(repo, [])
|
tree = Tree(repo, [])
|
||||||
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
||||||
tree.items.append(item)
|
tree.items.append(item)
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_bytes(cls, repo: Repository, data: bytes) -> Tree:
|
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||||
stream = BytesIO(data)
|
stream = BytesIO(data)
|
||||||
return cls.from_stream(repo, stream)
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
@@ -282,156 +537,143 @@ class Tree:
|
|||||||
self.write(stream)
|
self.write(stream)
|
||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.items)
|
||||||
|
|
||||||
|
def get(self, key: str) -> TreeItem | None:
|
||||||
|
for item in self.items:
|
||||||
|
if item.name == key:
|
||||||
|
return item
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> TreeItem:
|
||||||
|
item = self.get(key)
|
||||||
|
if item is None:
|
||||||
|
raise KeyError(f"{key} not found")
|
||||||
|
return item
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class TreeObject(ObjectInfo):
|
||||||
|
tree: Tree
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_size(self) -> int:
|
||||||
|
return self.size + self.tree.total_size
|
||||||
|
|
||||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TreeItem:
|
class TreeItem:
|
||||||
name: str
|
|
||||||
digest: Digest
|
digest: Digest
|
||||||
|
object_type: bytes
|
||||||
|
size: int
|
||||||
permissions: int
|
permissions: int
|
||||||
creation_timestamp: int
|
modification_timestamp_us: int
|
||||||
modification_timestamp: int
|
name: str
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
|
||||||
digest: Digest,
|
digest: Digest,
|
||||||
|
object_type: bytes,
|
||||||
|
size: int,
|
||||||
permissions: int,
|
permissions: int,
|
||||||
creation_timestamp: int,
|
modification_timestamp_us: int,
|
||||||
modification_timestamp: int,
|
name: str,
|
||||||
):
|
):
|
||||||
if "/\\" in name:
|
if "/\\" in name:
|
||||||
raise ValueError(f"invalid tree item name {name}")
|
raise ValueError(f"invalid tree item name {name}")
|
||||||
self.name = name
|
|
||||||
self.digest = digest
|
self.digest = digest
|
||||||
|
self.object_type = object_type
|
||||||
|
self.size = size
|
||||||
self.permissions = permissions
|
self.permissions = permissions
|
||||||
self.creation_timestamp = creation_timestamp
|
self.modification_timestamp_us = modification_timestamp_us
|
||||||
self.modification_timestamp = modification_timestamp
|
self.name = name
|
||||||
|
|
||||||
@property
|
|
||||||
def creation_time(self) -> DateTime:
|
|
||||||
return time_from_timestamp(self.creation_timestamp)
|
|
||||||
@creation_time.setter
|
|
||||||
def creation_time(self, time: DateTime):
|
|
||||||
self.creation_timestamp = timestamp_from_time(time)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def modification_time(self) -> DateTime:
|
def modification_time(self) -> DateTime:
|
||||||
return time_from_timestamp(self.modification_timestamp)
|
return time_from_timestamp_us(self.modification_timestamp_us)
|
||||||
@modification_time.setter
|
@modification_time.setter
|
||||||
def modification_time(self, time: DateTime):
|
def modification_time(self, time: DateTime):
|
||||||
self.modification_timestamp = timestamp_from_time(time)
|
self.modification_timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||||
if digest_bytes is None:
|
if digest_bytes is None:
|
||||||
return None
|
return None
|
||||||
return TreeItem(
|
return TreeItem(
|
||||||
digest = Digest(digest_bytes),
|
digest = Digest(digest_bytes),
|
||||||
|
object_type = read_exact(stream, 4),
|
||||||
|
size = int.from_bytes(read_exact(stream, 8)),
|
||||||
permissions = int.from_bytes(read_exact(stream, 2)),
|
permissions = int.from_bytes(read_exact(stream, 2)),
|
||||||
creation_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
|
||||||
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
def write(self, stream: BinaryIO):
|
||||||
stream.write(self.digest.digest)
|
stream.write(self.digest.digest)
|
||||||
|
stream.write(self.object_type)
|
||||||
|
stream.write(self.size.to_bytes(8))
|
||||||
stream.write(self.permissions.to_bytes(2))
|
stream.write(self.permissions.to_bytes(2))
|
||||||
stream.write(self.creation_timestamp.to_bytes(8, signed=True))
|
stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
|
||||||
stream.write(self.modification_timestamp.to_bytes(8, signed=True))
|
|
||||||
name_bytes = self.name.encode("utf-8")
|
name_bytes = self.name.encode("utf-8")
|
||||||
stream.write(len(name_bytes).to_bytes(2))
|
stream.write(len(name_bytes).to_bytes(2))
|
||||||
stream.write(name_bytes)
|
stream.write(name_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Snapshot:
|
class Snapshot:
|
||||||
repo: Repository
|
repo: Repository
|
||||||
tree_digest: Digest
|
tree_digest: Digest
|
||||||
|
parents: list[Digest]
|
||||||
repo_name: str
|
repo_name: str
|
||||||
timestamp: int
|
timestamp_us: int
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
assert len(self.parents) < 256
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def time(self) -> DateTime:
|
def time(self) -> DateTime:
|
||||||
return time_from_timestamp(self.timestamp)
|
return time_from_timestamp_us(self.timestamp_us)
|
||||||
@time.setter
|
@time.setter
|
||||||
def time(self, time: DateTime):
|
def time(self, time: DateTime):
|
||||||
self.timestamp = timestamp_from_time(time)
|
self.timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
return Snapshot(
|
return Snapshot(
|
||||||
repo = repo,
|
repo = repo,
|
||||||
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
||||||
|
parents = [
|
||||||
|
Digest(read_exact(stream, repo._cas._digest_size))
|
||||||
|
for _ in range(int.from_bytes(read_exact(stream, 1)))
|
||||||
|
],
|
||||||
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_bytes(cls, repo: Repository, data: bytes) -> Snapshot:
|
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||||
stream = BytesIO(data)
|
stream = BytesIO(data)
|
||||||
return cls.from_stream(repo, stream)
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
def write(self, stream: BinaryIO):
|
||||||
|
assert len(self.parents) < 256
|
||||||
stream.write(self.tree_digest.digest)
|
stream.write(self.tree_digest.digest)
|
||||||
|
stream.write(len(self.parents).to_bytes(1))
|
||||||
|
for parent in self.parents:
|
||||||
|
stream.write(parent.digest)
|
||||||
repo_name_bytes = self.repo_name.encode("utf-8")
|
repo_name_bytes = self.repo_name.encode("utf-8")
|
||||||
stream.write(len(repo_name_bytes).to_bytes(2))
|
stream.write(len(repo_name_bytes).to_bytes(2))
|
||||||
stream.write(repo_name_bytes)
|
stream.write(repo_name_bytes)
|
||||||
stream.write(self.timestamp.to_bytes(8, signed=True))
|
stream.write(self.timestamp_us.to_bytes(8, signed=True))
|
||||||
|
|
||||||
def to_bytes(self) -> bytes:
|
def to_bytes(self) -> bytes:
|
||||||
stream = BytesIO()
|
stream = BytesIO()
|
||||||
self.write(stream)
|
self.write(stream)
|
||||||
return stream.getvalue()
|
return stream.getvalue()
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
@dataclass
|
class SnapshotObject(ObjectInfo):
|
||||||
class Chunk:
|
snapshot: Snapshot
|
||||||
digest: Digest
|
|
||||||
size: int
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
|
||||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
|
||||||
if digest_bytes is None:
|
|
||||||
return None
|
|
||||||
digest = Digest(digest_bytes)
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
digest = digest,
|
|
||||||
size = int.from_bytes(read_exact(stream, 4)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PathPair:
|
|
||||||
bsv: PurePosixPath
|
|
||||||
fs: Path
|
|
||||||
|
|
||||||
def __init__(self, bsv: PurePosixPath, fs: Path):
|
|
||||||
self.bsv = bsv
|
|
||||||
self.fs = fs
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_obj(cls, obj: dict[str, Any]) -> PathPair:
|
|
||||||
bsv = PurePosixPath(obj["bsv"])
|
|
||||||
fs = Path(obj["fs"])
|
|
||||||
|
|
||||||
if not bsv.is_absolute() or not fs.is_absolute():
|
|
||||||
raise ValueError("paths in path_map must be absolute")
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
bsv = obj["bsv"],
|
|
||||||
fs = obj["fs"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def __lt__(self, rhs: PathPair) -> bool:
|
|
||||||
return self.bsv < rhs.bsv
|
|
||||||
|
|
||||||
|
|
||||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
|
||||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
|
||||||
|
|
||||||
def timestamp_from_time(time: DateTime) -> int:
|
|
||||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
|
||||||
|
|||||||
@@ -19,21 +19,10 @@ from dataclasses import dataclass
|
|||||||
import hashlib
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Callable, Iterator
|
from typing import Any, BinaryIO, Callable, Iterator
|
||||||
|
from bsv.exception import NotFound, UnexpectedObjectType
|
||||||
|
from bsv.object import Digest, Object, ObjectInfo
|
||||||
|
|
||||||
from bsv.simple_cas.util import Hash, read_exact_or_eof
|
from bsv.util import Hash, read_exact_or_eof
|
||||||
|
|
||||||
|
|
||||||
class BsvError(RuntimeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class NotFound(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class UnexpectedObjectType(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ConfigError(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleCas:
|
class SimpleCas:
|
||||||
@@ -105,21 +94,19 @@ class SimpleCas:
|
|||||||
assert size == item.size
|
assert size == item.size
|
||||||
data = stream.read(size)
|
data = stream.read(size)
|
||||||
|
|
||||||
return Object(object_type, data)
|
return Object(digest, object_type, size, data)
|
||||||
|
|
||||||
def write(self, object_type: bytes, data: bytes) -> Digest:
|
def write(self, object_type: bytes, data: bytes, dry_run: bool=False) -> Digest:
|
||||||
assert len(object_type) == 4
|
assert len(object_type) == 4
|
||||||
assert len(data) < 2**32
|
assert len(data) < 2**32
|
||||||
|
|
||||||
hash = self._hash_factory()
|
hash = self._hash_factory()
|
||||||
hash.update(object_type)
|
hash.update(object_type)
|
||||||
hash.update(b"\0")
|
|
||||||
hash.update(len(data).to_bytes(4))
|
hash.update(len(data).to_bytes(4))
|
||||||
hash.update(b"\0")
|
|
||||||
hash.update(data)
|
hash.update(data)
|
||||||
digest = Digest(hash.digest())
|
digest = Digest(hash.digest())
|
||||||
|
|
||||||
if digest not in self:
|
if not dry_run and digest not in self:
|
||||||
with self._open_writer(digest, object_type, len(data)) as out:
|
with self._open_writer(digest, object_type, len(data)) as out:
|
||||||
out.write(digest.digest)
|
out.write(digest.digest)
|
||||||
out.write(object_type)
|
out.write(object_type)
|
||||||
@@ -164,23 +151,7 @@ class SimpleCas:
|
|||||||
return self._root_dir / "refs" / key_path
|
return self._root_dir / "refs" / key_path
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, order=True, slots=True)
|
@dataclass(frozen=True)
|
||||||
class Digest:
|
|
||||||
digest: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return self.digest.hex()
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Object:
|
|
||||||
object_type: bytes
|
|
||||||
data: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class IndexItem:
|
class IndexItem:
|
||||||
object_type: bytes
|
object_type: bytes
|
||||||
offset: int
|
offset: int
|
||||||
@@ -188,12 +159,3 @@ class IndexItem:
|
|||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ObjectInfo:
|
|
||||||
digest: Digest
|
|
||||||
object_type: bytes
|
|
||||||
size: int
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"
|
|
||||||
|
|||||||
268
src/bsv/tree_walker.py
Normal file
268
src/bsv/tree_walker.py
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime as DateTime, timedelta as TimeDelta
|
||||||
|
from enum import Enum
|
||||||
|
from os import stat_result
|
||||||
|
from pathlib import Path
|
||||||
|
import stat
|
||||||
|
|
||||||
|
from bsv.object import Digest, ObjectInfo
|
||||||
|
from bsv.repository import BlobObject, Repository, SymlinkObject, Tree, TreeItem, TreeObject
|
||||||
|
from bsv.util import is_bsv_repository, object_type_from_mode
|
||||||
|
|
||||||
|
|
||||||
|
class Action(Enum):
|
||||||
|
ADD = "add"
|
||||||
|
UPDATE = "update"
|
||||||
|
REMOVE = "remove"
|
||||||
|
IGNORE = "ignore"
|
||||||
|
ERROR = "error"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
|
||||||
|
assert digest
|
||||||
|
if not source_digest:
|
||||||
|
return Action.ADD, None
|
||||||
|
elif source_digest == digest:
|
||||||
|
return Action.IGNORE, IgnoreCause.UNCHANGED
|
||||||
|
else:
|
||||||
|
return Action.UPDATE, None
|
||||||
|
|
||||||
|
class IgnoreCause(Enum):
|
||||||
|
IGNORE_RULE = "ignore_rule"
|
||||||
|
UNCHANGED = "unchanged"
|
||||||
|
UNSUPPORTED_TYPE = "unsupported_type"
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker:
|
||||||
|
_repo: Repository
|
||||||
|
_time_rounding_us: int = 2000000
|
||||||
|
_force_hash: bool = False
|
||||||
|
_dry_run: bool = False
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
repo: Repository,
|
||||||
|
*,
|
||||||
|
time_rounding_us: int = 2000000,
|
||||||
|
force_hash: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
):
|
||||||
|
self._repo = repo
|
||||||
|
self._time_rounding_us = time_rounding_us
|
||||||
|
self._force_hash = force_hash
|
||||||
|
self._dry_run = dry_run
|
||||||
|
|
||||||
|
# def add_virtual_tree(self, paths: PathMap) -> Digest:
|
||||||
|
# assert paths
|
||||||
|
|
||||||
|
# fs_paths = sorted([
|
||||||
|
# pair.fs
|
||||||
|
# for pair in paths.pairs
|
||||||
|
# ])
|
||||||
|
|
||||||
|
# tree_map = {
|
||||||
|
# fs_paths[0]: self.add_tree()
|
||||||
|
# }
|
||||||
|
# root = {}
|
||||||
|
# for pair in paths.paths:
|
||||||
|
# vdir = root
|
||||||
|
# for part in pair.bsv.parts[:-1]:
|
||||||
|
# vdir = vdir.setdefault(part, {})
|
||||||
|
# vdir[pair.bsv.parts[-1]] = pair.fs
|
||||||
|
|
||||||
|
# return self._add_virtual_tree(root)
|
||||||
|
|
||||||
|
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
|
||||||
|
# tree = Tree(self._repo, [])
|
||||||
|
# for name, value in vtree.items():
|
||||||
|
# if isinstance(value, dict):
|
||||||
|
# digest = self._add_virtual_tree(value)
|
||||||
|
# elif isinstance(value, Path):
|
||||||
|
# digest = self.add_tree(value)
|
||||||
|
# else:
|
||||||
|
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
|
||||||
|
# tree.items.append(TreeItem(
|
||||||
|
# digest = digest,
|
||||||
|
# object_type = b"tree",
|
||||||
|
# size = 0,
|
||||||
|
# permissions = 0o766,
|
||||||
|
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# name = name,
|
||||||
|
# ))
|
||||||
|
# return self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> TreeObject:
|
||||||
|
pstat = path.stat(follow_symlinks=False)
|
||||||
|
if self.ignore(path, pstat):
|
||||||
|
raise ValueError(f"path {path} is ignored")
|
||||||
|
# self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
||||||
|
# return Digest()
|
||||||
|
return self._add_tree(path, pstat, source_digest=source_digest)
|
||||||
|
|
||||||
|
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> TreeObject:
|
||||||
|
source = self._repo.get_tree(source_digest).tree if source_digest else None
|
||||||
|
|
||||||
|
tree = Tree(self._repo, [])
|
||||||
|
subpaths = sorted(path.iterdir())
|
||||||
|
|
||||||
|
subpath_index = 0
|
||||||
|
source_item_index = 0
|
||||||
|
|
||||||
|
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
|
||||||
|
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
|
||||||
|
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
|
||||||
|
|
||||||
|
if subpath and source_item:
|
||||||
|
if subpath.name < source_item.name:
|
||||||
|
source_item = None
|
||||||
|
elif subpath.name > source_item.name:
|
||||||
|
subpath = None
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
subpath_index += 1
|
||||||
|
if source_item is not None:
|
||||||
|
source_item_index += 1
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
obj: ObjectInfo | None = None
|
||||||
|
try:
|
||||||
|
istat = subpath.lstat()
|
||||||
|
|
||||||
|
if self.ignore(subpath, istat, source=source_item):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (source_item is not None and
|
||||||
|
not self._force_hash and
|
||||||
|
not stat.S_ISDIR(istat.st_mode) and
|
||||||
|
pstat.st_size == source_item.size and
|
||||||
|
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
|
||||||
|
):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
|
||||||
|
tree.items.append(source_item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sub_source_digest = source_item and source_item.digest
|
||||||
|
object_type = object_type_from_mode(istat.st_mode)
|
||||||
|
if object_type == b"slnk":
|
||||||
|
obj = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = obj.size
|
||||||
|
elif object_type == b"tree":
|
||||||
|
obj = self._add_tree(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = obj.total_size
|
||||||
|
elif object_type == b"blob":
|
||||||
|
obj = self._add_blob(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = istat.st_size
|
||||||
|
else:
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
||||||
|
continue
|
||||||
|
except Exception as err:
|
||||||
|
self.report(Action.ERROR, subpath, None, err)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if obj:
|
||||||
|
tree.items.append(TreeItem(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = object_type,
|
||||||
|
size = size,
|
||||||
|
permissions = stat.S_IMODE(istat.st_mode),
|
||||||
|
modification_timestamp_us = istat.st_mtime_ns // 1000,
|
||||||
|
name = subpath.name,
|
||||||
|
))
|
||||||
|
elif source_item:
|
||||||
|
self.report(Action.REMOVE, path / source_item.name, None, source_item)
|
||||||
|
|
||||||
|
tree_object = self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(tree_object.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return tree_object
|
||||||
|
|
||||||
|
|
||||||
|
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> SymlinkObject:
|
||||||
|
# TODO: Store symlink relative to current dir ?
|
||||||
|
# * What about symlink that points outside of the backup dirs
|
||||||
|
# * Should symlinks that points inside the backup dirs but in another
|
||||||
|
# mount-point adjusted ?
|
||||||
|
# * Should absolute symlink be restored as absolute ?
|
||||||
|
obj = self._repo.add_symlink_from_fs_target(path, path.readlink())
|
||||||
|
|
||||||
|
action, info = Action.from_digests(obj.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> BlobObject:
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
obj = self._repo.add_blob(stream, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(obj.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
|
||||||
|
return is_bsv_repository(path)
|
||||||
|
|
||||||
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
|
||||||
|
match action, info:
|
||||||
|
case (Action.ADD, None):
|
||||||
|
print(f"Add: {path}")
|
||||||
|
case (Action.UPDATE, None):
|
||||||
|
print(f"Add: {path}")
|
||||||
|
case (Action.REMOVE, item) if isinstance(item, TreeItem):
|
||||||
|
print(f"Remove: {path / item.name}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
||||||
|
print(f"Ignore (rule): {path}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
||||||
|
print(f"Ignore (unchanged): {path}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
||||||
|
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
||||||
|
case (Action.ERROR, _) if isinstance(info, Exception):
|
||||||
|
print(f"Error {info}: {path}")
|
||||||
|
case _:
|
||||||
|
raise ValueError("TreeWalker.report(): unsupported parameter combination")
|
||||||
|
|
||||||
|
|
||||||
|
def path_type_name(pstat: stat_result) -> str:
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if stat.S_ISBLK(pstat.st_mode):
|
||||||
|
parts.append("block_device")
|
||||||
|
if stat.S_ISCHR(pstat.st_mode):
|
||||||
|
parts.append("char_device")
|
||||||
|
if stat.S_ISDIR(pstat.st_mode):
|
||||||
|
parts.append("dir")
|
||||||
|
if stat.S_ISDOOR(pstat.st_mode):
|
||||||
|
parts.append("door")
|
||||||
|
if stat.S_ISFIFO(pstat.st_mode):
|
||||||
|
parts.append("fifo")
|
||||||
|
if stat.S_ISLNK(pstat.st_mode):
|
||||||
|
parts.append("symlink")
|
||||||
|
if stat.S_ISPORT(pstat.st_mode):
|
||||||
|
parts.append("port")
|
||||||
|
if stat.S_ISREG(pstat.st_mode):
|
||||||
|
parts.append("file")
|
||||||
|
if stat.S_ISSOCK(pstat.st_mode):
|
||||||
|
parts.append("socket")
|
||||||
|
if stat.S_ISWHT(pstat.st_mode):
|
||||||
|
parts.append("whiteout")
|
||||||
|
|
||||||
|
if not parts:
|
||||||
|
return "unknown"
|
||||||
|
return ", ".join(parts)
|
||||||
118
src/bsv/util.py
Normal file
118
src/bsv/util.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import platform
|
||||||
|
import stat
|
||||||
|
from typing import BinaryIO
|
||||||
|
|
||||||
|
|
||||||
|
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def time_from_timestamp_us(timestamp: int) -> DateTime:
|
||||||
|
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||||
|
|
||||||
|
def timestamp_us_from_time(time: DateTime) -> int:
|
||||||
|
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||||
|
|
||||||
|
|
||||||
|
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||||
|
data = stream.read(num_bytes)
|
||||||
|
if len(data) != num_bytes:
|
||||||
|
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
||||||
|
data = stream.read(num_bytes)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
if len(data) != num_bytes:
|
||||||
|
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def is_bsv_repository(path: Path) -> bool:
|
||||||
|
return (path / "bsv_repository.config").is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def object_type_from_path(path: Path) -> bytes:
|
||||||
|
return object_type_from_mode(path.stat(follow_symlinks=False).st_mode)
|
||||||
|
|
||||||
|
def object_type_from_mode(mode: int) -> bytes:
|
||||||
|
if stat.S_ISLNK(mode):
|
||||||
|
return b"slnk"
|
||||||
|
elif stat.S_ISDIR(mode):
|
||||||
|
return b"tree"
|
||||||
|
elif stat.S_ISREG(mode):
|
||||||
|
return b"blob"
|
||||||
|
return b""
|
||||||
|
|
||||||
|
|
||||||
|
def default_bsv_config_path() -> Path:
|
||||||
|
path = Path(os.environ.get("BSV_CONFIG", ""))
|
||||||
|
if path and path.is_absolute() and path.is_file():
|
||||||
|
return path
|
||||||
|
for path in user_config_dirs():
|
||||||
|
if path.is_file():
|
||||||
|
return path
|
||||||
|
return user_config_home() / "bsv/config"
|
||||||
|
|
||||||
|
def default_local_repository_path() -> Path:
|
||||||
|
return user_data_home() / "bsv"
|
||||||
|
|
||||||
|
|
||||||
|
def user_data_home() -> Path:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
path = Path(os.environ.get("XDG_DATA_HOME", ""))
|
||||||
|
if path and path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path.home() / ".local/share"
|
||||||
|
|
||||||
|
def user_config_home() -> Path:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
path = Path(os.environ.get("XDG_CONFIG_HOME", ""))
|
||||||
|
if path and path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path.home() / ".config"
|
||||||
|
|
||||||
|
def user_config_dirs() -> list[Path]:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
paths = list(filter(Path.is_absolute, map(Path, (os.environ.get("XDG_CONFIG_DIRS") or "/etc/xdg").split(":"))))
|
||||||
|
return [user_config_home()] + paths
|
||||||
|
|
||||||
|
|
||||||
|
class Hash(ABC):
|
||||||
|
name: str
|
||||||
|
digest_size: int
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update(self, *data: bytes | bytearray | memoryview):
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def digest(self) -> bytes:
|
||||||
|
...
|
||||||
@@ -15,16 +15,18 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from io import BytesIO
|
from os import stat_result
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from random import randbytes
|
from random import randbytes
|
||||||
|
from shutil import rmtree
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
|
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
|
||||||
from bsv.simple_cas.cas import Digest
|
from bsv.simple_cas.cas import Digest
|
||||||
|
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -36,8 +38,9 @@ def tmp_dir():
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def repo(tmp_dir):
|
def repo(tmp_dir):
|
||||||
return create_repository(
|
return create_repository(
|
||||||
tmp_dir / "bsv",
|
tmp_dir / "bsv.config",
|
||||||
"test_repo",
|
"test_repo",
|
||||||
|
tmp_dir / "bsv_repo",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -46,13 +49,17 @@ def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
|||||||
make_random_file(path, 1 << 20)
|
make_random_file(path, 1 << 20)
|
||||||
|
|
||||||
with path.open("rb") as stream:
|
with path.open("rb") as stream:
|
||||||
digest = repo.add_blob(stream)
|
obj0 = repo.add_blob(stream)
|
||||||
|
assert obj0.object_type == b"blob"
|
||||||
blob = repo.get_blob(digest)
|
|
||||||
data = blob.read()
|
|
||||||
|
|
||||||
with path.open("rb") as stream:
|
with path.open("rb") as stream:
|
||||||
assert data == stream.read()
|
assert obj0.blob.reader().read() == stream.read()
|
||||||
|
|
||||||
|
obj1 = repo.get_blob(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
assert obj1.blob.reader().read() == stream.read()
|
||||||
|
|
||||||
|
|
||||||
def test_read_write_tree(repo: Repository):
|
def test_read_write_tree(repo: Repository):
|
||||||
@@ -61,40 +68,208 @@ def test_read_write_tree(repo: Repository):
|
|||||||
repo,
|
repo,
|
||||||
[
|
[
|
||||||
TreeItem(
|
TreeItem(
|
||||||
"xyz",
|
digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||||
Digest(bytes([42]) * repo._cas._digest_size),
|
object_type = b"blob",
|
||||||
0o744,
|
size = 123,
|
||||||
creation_timestamp = timestamp_from_time(now),
|
permissions = 0o744,
|
||||||
modification_timestamp = timestamp_from_time(now),
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
|
name = "xyz",
|
||||||
),
|
),
|
||||||
TreeItem(
|
TreeItem(
|
||||||
"foobar",
|
digest = Digest(bytes([123]) * repo._cas._digest_size),
|
||||||
Digest(bytes([123]) * repo._cas._digest_size),
|
object_type = b"slnk",
|
||||||
0o777,
|
size = 42,
|
||||||
creation_timestamp = timestamp_from_time(now),
|
permissions = 0o777,
|
||||||
modification_timestamp = timestamp_from_time(now),
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
|
name = "foobar",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
||||||
|
|
||||||
digest = repo.add_tree(tree)
|
obj0 = repo.add_tree(tree)
|
||||||
assert repo.get_tree(digest) == tree
|
assert obj0.object_type == b"tree"
|
||||||
|
assert obj0.tree == tree
|
||||||
|
|
||||||
|
obj1 = repo.get_tree(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
assert obj1.tree == obj0.tree
|
||||||
|
|
||||||
|
|
||||||
def test_read_write_snapshot(repo: Repository):
|
def test_read_write_snapshot(repo: Repository):
|
||||||
snapshot = Snapshot(
|
snapshot = Snapshot(
|
||||||
repo = repo,
|
repo = repo,
|
||||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||||
|
parents = [
|
||||||
|
Digest(bytes([123]) * repo._cas._digest_size),
|
||||||
|
Digest(bytes([124]) * repo._cas._digest_size),
|
||||||
|
],
|
||||||
repo_name = "test_repo",
|
repo_name = "test_repo",
|
||||||
timestamp = timestamp_from_time(datetime.now()),
|
timestamp_us = timestamp_us_from_time(datetime.now()),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||||
|
|
||||||
digest = repo.add_snapshot(snapshot)
|
obj0 = repo.add_snapshot(snapshot)
|
||||||
assert repo.get_snapshot(digest) == snapshot
|
assert obj0.object_type == b"snap"
|
||||||
|
assert obj0.snapshot == snapshot
|
||||||
|
|
||||||
|
obj1 = repo.get_snapshot(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
assert obj1.snapshot == obj0.snapshot
|
||||||
|
|
||||||
|
|
||||||
|
class TestTreeWalker(TreeWalker):
|
||||||
|
reports: list
|
||||||
|
|
||||||
|
def __init__(self, repo: Repository, dry_run: bool=False):
|
||||||
|
super().__init__(repo, dry_run=dry_run)
|
||||||
|
self.reports = []
|
||||||
|
|
||||||
|
def report(
|
||||||
|
self,
|
||||||
|
action: Action,
|
||||||
|
path: Path,
|
||||||
|
pstat: stat_result | None,
|
||||||
|
info: IgnoreCause | Exception | None = None
|
||||||
|
):
|
||||||
|
super().report(action, path, pstat, info)
|
||||||
|
self.reports.append((action, path, info if action != Action.REMOVE else None))
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||||
|
dir = tmp_dir / "test0"
|
||||||
|
structure0 = {
|
||||||
|
"folder": {
|
||||||
|
"sub_folder": {
|
||||||
|
"empty_folder": {},
|
||||||
|
"foo.txt": b"Hello World!\n",
|
||||||
|
},
|
||||||
|
"test.py": b"print(\"Hello World!\")\n",
|
||||||
|
"bar.dat": bytes(range(256)),
|
||||||
|
},
|
||||||
|
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||||
|
"bsv_repo": {
|
||||||
|
"bsv_repository.config": b"[bsv]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
structure1 = {
|
||||||
|
"folder": {
|
||||||
|
"sub_folder": {
|
||||||
|
"empty_folder": {},
|
||||||
|
"foo.txt": b"Hello World!\n",
|
||||||
|
},
|
||||||
|
"bar.dat": bytes(range(256)) * 2,
|
||||||
|
},
|
||||||
|
"new_file": b"whatever",
|
||||||
|
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||||
|
"bsv_repo": {
|
||||||
|
"bsv_repository.config": b"[bsv]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected0 = dict(structure0)
|
||||||
|
del expected0["bsv_repo"]
|
||||||
|
|
||||||
|
expected1 = dict(structure1)
|
||||||
|
del expected1["bsv_repo"]
|
||||||
|
|
||||||
|
create_file_structure(dir, structure0)
|
||||||
|
|
||||||
|
def check(digest: Digest, value: dict | bytes):
|
||||||
|
if isinstance(value, dict):
|
||||||
|
tree = repo.get_tree(digest).tree
|
||||||
|
assert tree
|
||||||
|
assert list(map(lambda i: i.name, tree.items)) == sorted(value.keys())
|
||||||
|
for item in tree.items:
|
||||||
|
check(item.digest, value[item.name])
|
||||||
|
elif isinstance(value, bytes):
|
||||||
|
blob_obj = repo.get_blob(digest)
|
||||||
|
data = blob_obj.blob.reader().read()
|
||||||
|
assert data == value
|
||||||
|
|
||||||
|
walker = TestTreeWalker(repo)
|
||||||
|
obj0 = walker.add_tree(dir)
|
||||||
|
assert obj0.object_type == b"tree"
|
||||||
|
assert walker.reports == [
|
||||||
|
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.ADD, dir / "folder/bar.dat", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/test.py", None),
|
||||||
|
(Action.ADD, dir / "folder", None),
|
||||||
|
(Action.ADD, dir, None),
|
||||||
|
]
|
||||||
|
check(obj0.digest, expected0)
|
||||||
|
|
||||||
|
create_file_structure(dir, structure1)
|
||||||
|
|
||||||
|
walker.reports.clear()
|
||||||
|
obj1 = walker.add_tree(dir, source_digest=obj0.digest)
|
||||||
|
assert obj0.object_type == b"tree"
|
||||||
|
assert walker.reports == [
|
||||||
|
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.UPDATE, dir / "folder/bar.dat", None),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.REMOVE, dir / "folder/test.py", None),
|
||||||
|
(Action.UPDATE, dir / "folder", None),
|
||||||
|
(Action.ADD, dir / "new_file", None),
|
||||||
|
(Action.UPDATE, dir, None),
|
||||||
|
]
|
||||||
|
check(obj1.digest, expected1)
|
||||||
|
|
||||||
|
|
||||||
|
def create_file_structure(dst: Path, value: dict | bytes):
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
if dst.is_dir():
|
||||||
|
rmtree(str(dst))
|
||||||
|
if not dst.is_file() or dst.read_bytes() != value:
|
||||||
|
dst.write_bytes(value)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
if dst.is_file():
|
||||||
|
dst.unlink()
|
||||||
|
if not dst.is_dir():
|
||||||
|
dst.mkdir()
|
||||||
|
|
||||||
|
items = sorted(value.items())
|
||||||
|
fs_paths = sorted(dst.iterdir())
|
||||||
|
|
||||||
|
item_index = 0
|
||||||
|
fs_path_index = 0
|
||||||
|
|
||||||
|
while item_index < len(value) or fs_path_index < len(fs_paths):
|
||||||
|
name, subitem = items[item_index] if item_index < len(items) else (None, None)
|
||||||
|
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
|
||||||
|
|
||||||
|
if name and fs_path:
|
||||||
|
if name < fs_path.name:
|
||||||
|
fs_path = None
|
||||||
|
elif name > fs_path.name:
|
||||||
|
name = None
|
||||||
|
|
||||||
|
if name:
|
||||||
|
item_index += 1
|
||||||
|
if fs_path:
|
||||||
|
fs_path_index += 1
|
||||||
|
|
||||||
|
if name:
|
||||||
|
create_file_structure(dst / name, subitem) # type: ignore
|
||||||
|
elif fs_path and fs_path.is_dir():
|
||||||
|
rmtree(fs_path)
|
||||||
|
elif fs_path:
|
||||||
|
fs_path.unlink()
|
||||||
|
else:
|
||||||
|
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
||||||
|
|
||||||
|
|
||||||
def make_random_file(path: Path, size: int):
|
def make_random_file(path: Path, size: int):
|
||||||
|
|||||||
@@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
cas = SimpleCas(
|
cas = SimpleCas(
|
||||||
@@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
digest2 = cas.write(b"blob", data)
|
digest2 = cas.write(b"blob", data)
|
||||||
|
|||||||
Reference in New Issue
Block a user