Compare commits
12 Commits
67d15f989a
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| f95c68ee41 | |||
| 2b961baa5b | |||
| 69bb85af01 | |||
| a97395370a | |||
| b1d2fe7717 | |||
| e74eaf0408 | |||
| d058cd0631 | |||
| 52a553d72b | |||
| eb6ae85698 | |||
| 073fd5e567 | |||
| 17bef2e63a | |||
| 7420d891d4 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,4 +1,6 @@
|
||||
__pycache__
|
||||
*.egg-info
|
||||
/.coverage
|
||||
/.hypothesis/
|
||||
/src/bsv/_version.py
|
||||
/venv
|
||||
*.egg-info
|
||||
|
||||
82
README.md
82
README.md
@@ -1,3 +1,83 @@
|
||||
# BSV - Backup, Synchronization, Versioning
|
||||
|
||||
Readme to be done.
|
||||
Bsv is a tool to perform backups, file/directory synchronization between devices and light versioning. It's architecture is inspired from git, but focus on backup first, synchronization second and as a byproduct also allow versioning.
|
||||
|
||||
Bsv is in a very early stage of development and is not production ready yet.
|
||||
|
||||
|
||||
## Development setup
|
||||
|
||||
Bsv currently only supports Python >= 3.11. Once you have a compatible Python version, just fetch the code and install bsv in editable mode in a virtual env:
|
||||
|
||||
```
|
||||
git clone https://git.draklia.net/draklaw/pybsv.git
|
||||
cd pybsv
|
||||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
pip install -e .[test]
|
||||
```
|
||||
|
||||
This will automatically install the dependencies (including `pytest`). Happy hacking !
|
||||
|
||||
|
||||
## Roadmap
|
||||
|
||||
### v0.0.1 - Simple CAS + insert/remove files manually
|
||||
|
||||
Basic features. Naïve CAS implementation that store everything in a single big file with no option for removing objects. Supports a single "local" repository.
|
||||
|
||||
- [x] Simple CAS implementation (it's OK if it's naïve).
|
||||
- [x] Content-based chunking to store files.
|
||||
- [x] `bsv init` command to initialize bsv.
|
||||
- [x] `bsv info` print useful information bsv configuration.
|
||||
- [ ] `bsv log` show the history of snapshots.
|
||||
- [ ] `bsv show <digest>` show the object `digest`.
|
||||
- [ ] `bsv ls <bsv-path>` list files in a bsv directory.
|
||||
- [ ] `bsv mkdir <bsv-path>` create a directory in bsv directly.
|
||||
- [ ] `bsv add [-r] <fs-path> <bsv-path>` copy files from the filesystem to bsv.
|
||||
- [ ] `bsv get [-r] <bsv-path> <fs-path>` copy files from bsv to the filesystem.
|
||||
- [ ] `bsv rm [-r] <bsv-path>` create a directory in bsv directly.
|
||||
|
||||
### v0.0.2 - File map + snapshots
|
||||
|
||||
Add support for mapping files from BSV virtual file system to the actual file system. Add snapshot and restore commands.
|
||||
|
||||
- [ ] `bsv map` list mappings between bsv paths and filesystem paths.
|
||||
- [ ] `bsv map add <bsv-path> <fs-path>` add a mapping.
|
||||
- [ ] `bsv map remove <bsv-path> <fs-path>` remove a mapping.
|
||||
- [ ] `bsv snapshot` capture a snapshot, i.e. ensure that mapped files in the repositories match what is on the filesystem.
|
||||
- [ ] `bsv restore <snapshot> <fs-path>` update files on the filesystem to the version captured by `snapshot`.
|
||||
|
||||
### v0.0.3 - Multiple repository
|
||||
|
||||
Support multiple repository. Repository can be configured to store only metadata (typically for the local repository) or everything.
|
||||
|
||||
- [ ] Support repositories that store only metadata.
|
||||
- [ ] `bsv repo [-v]` list all known repositories.
|
||||
- [ ] `bsv repo create <name> <destination>` create a new repository.
|
||||
- [ ] `bsv repo add [<name>] <destination>` add an already existing repository.
|
||||
- [ ] `bsv repo remove <name>|<destination>` remove a repository.
|
||||
- [ ] `bsv fetch [<name>]` fetch latest metadata from known repositories.
|
||||
- [ ] `bsv sync` similar to `snapshot` + `fetch` + `restore`: Fetch latest changes from the repositories and update the filesystem to match. In case of conflict (file changed both in the repositories and locally), performs a snapshot first to ensure all conflicting versions are backed'up, then use some conflict-resolution strategy and warn the user.
|
||||
|
||||
### v0.0.4 - Proper CAS
|
||||
|
||||
- [ ] Safe concurrent access (e.g. when several devices use a shared repository).
|
||||
- [ ] Support removing objects.
|
||||
- [ ] Garbage collection (remove unreferenced objects).
|
||||
- [ ] Use garbage collection to keep metadata-only repository clean.
|
||||
|
||||
### v0.0.5 - Some extra features
|
||||
|
||||
- [ ] `bsv tag <name> [<snapshot>] [-m <message>]` set/update a tag (an alias to a specific snapshot).
|
||||
- [ ] Support for symlinks.
|
||||
|
||||
### Later
|
||||
|
||||
- [ ] `bsv watch` starts a daemon that watch changes in mapped directories and automatically create snapshots.
|
||||
- [ ] `bsv http` starts an http server that expose an API + an interface to manipulate BSV. Allow to list files, explore history, download and upload files...
|
||||
- [ ] Bsv protocol + client/server
|
||||
- [ ] Custom rules for repository to select what must be stored or not.
|
||||
- [ ] Create sensible rules for backup (keep a lot of recent versions, less for older versions).
|
||||
- [ ] Add object set support (a kind of object that simply store a collection of objects). Can be used as tag.
|
||||
- [ ] Add mail object ?
|
||||
|
||||
@@ -12,22 +12,78 @@ classifiers = [
|
||||
]
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"click",
|
||||
"fastcdc",
|
||||
"rich",
|
||||
"tomlkit",
|
||||
"typing-extensions"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = [
|
||||
dev = [
|
||||
"hypothesis",
|
||||
"pytest",
|
||||
"pytest-cov"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
# TODO
|
||||
# "Homepage" = "https://github.com/pypa/sampleproject"
|
||||
# "Bug Tracker" = "https://github.com/pypa/sampleproject/issues"
|
||||
"Homepage" = "https://git.draklia.net/draklaw/pybsv"
|
||||
"Bug Tracker" = "https://git.draklia.net/draklaw/pybsv/issues"
|
||||
|
||||
[project.scripts]
|
||||
bsv = "bsv.main:main"
|
||||
bsv = "bsv.cli:cli"
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
"B", # flake8-bugbear
|
||||
"D", # pydocstyle
|
||||
"DOC", # pydoclint
|
||||
"E", # pycodestyle
|
||||
"EM", # flake8-errmsg
|
||||
"F", # Pyflakes
|
||||
"FURB", # refurb
|
||||
"G", # flake8-logging-format
|
||||
"I", # isort
|
||||
"ISC", # flake8-implicit-str-concat
|
||||
"LOG", # flake8-logging
|
||||
"N", # pep8-naming
|
||||
"PERF", # Perflint
|
||||
"PT", # flake8-pytest-style
|
||||
"PTH", # flake8-use-pathlib
|
||||
"S", # flake8-bandit
|
||||
"SIM", # flake8-simplify
|
||||
"TC", # flake8-type-checking
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle
|
||||
]
|
||||
ignore = [
|
||||
"UP038", # Deprecated rule; bad idea.
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"**/tests/*" = [
|
||||
"D103", # Missing docstring in public function
|
||||
"S101", # Use of assert detected
|
||||
]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-sort-within-sections = true
|
||||
lines-after-imports = 2
|
||||
required-imports = ["from __future__ import annotations"]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_also = [
|
||||
"if TYPE_CHECKING:",
|
||||
]
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools", "setuptools-scm"]
|
||||
|
||||
18
src/bsv.bak/__init__.py
Normal file
18
src/bsv.bak/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from bsv._version import __version__, __version_tuple__
|
||||
21
src/bsv.bak/__main__.py
Normal file
21
src/bsv.bak/__main__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from bsv.main import main
|
||||
|
||||
|
||||
exit(main())
|
||||
16
src/bsv.bak/_version.py
Normal file
16
src/bsv.bak/_version.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# file generated by setuptools_scm
|
||||
# don't change, don't track in version control
|
||||
TYPE_CHECKING = False
|
||||
if TYPE_CHECKING:
|
||||
from typing import Tuple, Union
|
||||
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
||||
else:
|
||||
VERSION_TUPLE = object
|
||||
|
||||
version: str
|
||||
__version__: str
|
||||
__version_tuple__: VERSION_TUPLE
|
||||
version_tuple: VERSION_TUPLE
|
||||
|
||||
__version__ = version = '0.0.1.dev8+g52a553d.d20231127'
|
||||
__version_tuple__ = version_tuple = (0, 0, 1, 'dev8', 'g52a553d.d20231127')
|
||||
126
src/bsv.bak/cli.py
Normal file
126
src/bsv.bak/cli.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, TypeVar
|
||||
|
||||
from rich.console import Console
|
||||
from rich.text import Text
|
||||
|
||||
|
||||
_console: Console | None = None
|
||||
def get_console() -> Console:
|
||||
assert _console is not None
|
||||
return _console
|
||||
|
||||
_error_console: Console | None = None
|
||||
def get_error_console() -> Console:
|
||||
assert _error_console is not None
|
||||
return _error_console
|
||||
|
||||
|
||||
def init_consoles(color: str="auto"):
|
||||
global _console
|
||||
global _error_console
|
||||
|
||||
assert _console is None
|
||||
assert _error_console is None
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"tab_size": 4,
|
||||
}
|
||||
match color:
|
||||
case "always":
|
||||
kwargs["force_terminal"] = True
|
||||
case "auto":
|
||||
pass
|
||||
case "never":
|
||||
kwargs["no_color"] = True
|
||||
|
||||
_console = Console(
|
||||
**kwargs,
|
||||
)
|
||||
_error_console = Console(
|
||||
stderr = True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
PromptType = TypeVar("PromptType")
|
||||
|
||||
class NoDefaultType:
|
||||
def __repr__(self):
|
||||
return "NoDefault"
|
||||
NoDefault = NoDefaultType()
|
||||
|
||||
def prompt(
|
||||
prompt: str,
|
||||
factory: Callable[[str], PromptType],
|
||||
*,
|
||||
console: Console | None = None,
|
||||
default: PromptType | NoDefaultType = NoDefault,
|
||||
show_default: bool = True,
|
||||
) -> PromptType:
|
||||
if console is None:
|
||||
console = get_console()
|
||||
|
||||
prompt_text = Text(prompt, style="prompt")
|
||||
prompt_text.end = ""
|
||||
if show_default and default is not NoDefault:
|
||||
prompt_text.append(" ")
|
||||
prompt_text.append(f"({default})", style="prompt.default")
|
||||
prompt_text.append(": ")
|
||||
|
||||
while True:
|
||||
try:
|
||||
value = console.input(prompt_text)
|
||||
except KeyboardInterrupt:
|
||||
console.print("")
|
||||
raise
|
||||
|
||||
if not value and not isinstance(default, NoDefaultType):
|
||||
return default
|
||||
try:
|
||||
return factory(value)
|
||||
except ValueError as err:
|
||||
console.print(err)
|
||||
|
||||
def prompt_confirmation(prompt: str, *, console: Console | None=None, default: bool=True) -> bool:
|
||||
if console is None:
|
||||
console = get_console()
|
||||
|
||||
prompt_text = Text(prompt, style="prompt")
|
||||
prompt_text.end = ""
|
||||
prompt_text.append(" ")
|
||||
if default:
|
||||
prompt_text.append("(Y/n)", style="prompt.default")
|
||||
else:
|
||||
prompt_text.append("(y/N)", style="prompt.default")
|
||||
prompt_text.append(": ")
|
||||
|
||||
while True:
|
||||
try:
|
||||
value = console.input(prompt_text).strip().lower()
|
||||
except KeyboardInterrupt:
|
||||
console.print("")
|
||||
raise
|
||||
|
||||
if not value and not isinstance(default, NoDefaultType):
|
||||
return default
|
||||
if value not in "yn":
|
||||
console.print("Please answer 'y' or 'n'.")
|
||||
else:
|
||||
return value == "y"
|
||||
@@ -19,6 +19,7 @@ from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from bsv import __version__
|
||||
from bsv.cli import get_console
|
||||
from bsv.command import command
|
||||
from bsv.repository import Repository
|
||||
|
||||
@@ -33,27 +34,29 @@ def init_parser(parser: ArgumentParser):
|
||||
)
|
||||
|
||||
@command(init_parser)
|
||||
def info(repository_path: Path | None, verbosity: int=0) -> int:
|
||||
def info(config_path: Path, verbosity: int=0) -> int:
|
||||
"""Print informations about bsv: config file used, known repository, file mapping...
|
||||
"""
|
||||
|
||||
print(f"bsv v{__version__}")
|
||||
print = get_console().print
|
||||
|
||||
if repository_path is None:
|
||||
print("Repository path not found. Bsv is likely not setup on this device.")
|
||||
print(f"bsv [green]v{__version__}")
|
||||
|
||||
if not config_path.exists():
|
||||
print("bsv configuration not found. Bsv is likely not setup on this device.", style="red")
|
||||
return 0
|
||||
else:
|
||||
print(f"Repository path: {repository_path}")
|
||||
|
||||
repo = Repository(repository_path)
|
||||
repo = Repository(config_path)
|
||||
|
||||
print(f"Repository name: {repo.name}")
|
||||
print(f"[blue]Config path: [bold yellow]{repo.config_path}")
|
||||
print(f"[blue]Device name: [bold yellow]{repo.device_name}")
|
||||
print(f"[blue]Local repository: [bold yellow]{repo._local_repository_path}")
|
||||
|
||||
if repo.path_map:
|
||||
print("Path map: (bsv path <-> filesystem path)")
|
||||
for pair in sorted(repo.path_map):
|
||||
print("[blue]Path map:[/blue] (bsv path <-> filesystem path)")
|
||||
if repo.path_map.pairs:
|
||||
for pair in sorted(repo.path_map.pairs):
|
||||
print(f" {pair.bsv} <-> {pair.fs}")
|
||||
else:
|
||||
print("Path map is empty.")
|
||||
print(" [bold yellow]No path mapped.")
|
||||
|
||||
return 0
|
||||
118
src/bsv.bak/command/init.py
Normal file
118
src/bsv.bak/command/init.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
import platform
|
||||
|
||||
from bsv.command import command
|
||||
from bsv.repository import check_config_path, check_device_name, check_local_repository_path, create_repository
|
||||
from bsv.util import default_local_repository_path
|
||||
|
||||
|
||||
def init_parser(parser: ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--interactive", "-i",
|
||||
default = False,
|
||||
action = "store_true",
|
||||
help = "Prompt the user for configuration choices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-repository", "-l",
|
||||
type = Path,
|
||||
default = default_local_repository_path(),
|
||||
nargs = "?",
|
||||
dest = "local_repository_path",
|
||||
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device-name", "-n",
|
||||
default = platform.node(),
|
||||
help = "Name of the device. Default to system hostname.",
|
||||
)
|
||||
|
||||
@command(init_parser)
|
||||
def init(
|
||||
config_path: Path,
|
||||
device_name: str,
|
||||
local_repository_path: Path,
|
||||
interactive: bool = False,
|
||||
) -> int:
|
||||
"""Initialize a new bsv repository.
|
||||
"""
|
||||
from datetime import datetime as DateTime
|
||||
import tomlkit
|
||||
|
||||
from bsv.cli import get_console, get_error_console, prompt, prompt_confirmation
|
||||
|
||||
print = get_console().print
|
||||
|
||||
def make_config_path(value: str) -> Path:
|
||||
path = Path(value.strip())
|
||||
if not path.is_absolute():
|
||||
path = path.resolve()
|
||||
check_config_path(path)
|
||||
return path
|
||||
|
||||
def make_device_name(value: str) -> str:
|
||||
device_name = value.strip()
|
||||
check_device_name(device_name)
|
||||
return device_name
|
||||
|
||||
def make_local_repository_path(value: str) -> Path:
|
||||
path = Path(value)
|
||||
if not path.is_absolute():
|
||||
path = path.resolve()
|
||||
check_local_repository_path(path)
|
||||
return path
|
||||
|
||||
if interactive:
|
||||
config_path = prompt("Bsv configuration file", make_config_path, default=config_path)
|
||||
device_name = prompt("Device name", make_device_name, default=device_name)
|
||||
local_repository_path = prompt("Destination", make_local_repository_path, default=local_repository_path)
|
||||
|
||||
if not config_path.is_absolute():
|
||||
config_path = config_path.resolve()
|
||||
if not local_repository_path.is_absolute():
|
||||
local_repository_path = local_repository_path.resolve()
|
||||
|
||||
try:
|
||||
check_config_path(config_path)
|
||||
check_device_name(device_name)
|
||||
check_local_repository_path(local_repository_path)
|
||||
except ValueError as err:
|
||||
get_error_console().print(err, style="bold red")
|
||||
return 1
|
||||
|
||||
print("Bsv repository will be created with the following settings:", style="green")
|
||||
print("")
|
||||
print(f"\t[blue]:page_facing_up: Config path[/blue]: [bold yellow]{config_path}")
|
||||
print(f"\t[blue]:computer: Device name[/blue]: [bold yellow]{device_name}")
|
||||
print(f"\t[blue]:floppy_disk: Local repository[/blue]: [bold yellow]{local_repository_path}")
|
||||
print("")
|
||||
|
||||
if interactive:
|
||||
if not prompt_confirmation("Create repository ?"):
|
||||
return 1
|
||||
|
||||
create_repository(
|
||||
config_path = config_path,
|
||||
device_name = device_name,
|
||||
local_repository_path = local_repository_path,
|
||||
)
|
||||
|
||||
return 0
|
||||
32
src/bsv.bak/exception.py
Normal file
32
src/bsv.bak/exception.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class BsvError(RuntimeError):
|
||||
pass
|
||||
|
||||
class NotFound(BsvError):
|
||||
pass
|
||||
|
||||
class UnmappedPath(BsvError):
|
||||
pass
|
||||
|
||||
class UnexpectedObjectType(BsvError):
|
||||
pass
|
||||
|
||||
class ConfigError(BsvError):
|
||||
pass
|
||||
@@ -22,7 +22,9 @@ import sys
|
||||
from textwrap import dedent
|
||||
|
||||
from bsv import __version__
|
||||
from bsv.cli import get_error_console, init_consoles
|
||||
from bsv.command import init_commands
|
||||
from bsv.util import default_bsv_config_path
|
||||
|
||||
|
||||
def make_parser(
|
||||
@@ -31,10 +33,20 @@ def make_parser(
|
||||
) -> ArgumentParser:
|
||||
parent_parser = ArgumentParser(add_help=False)
|
||||
parent_parser.add_argument(
|
||||
"--repository",
|
||||
type = Path,
|
||||
"--color",
|
||||
default = "auto",
|
||||
choices = ("always", "auto", "never"),
|
||||
help = dedent("""
|
||||
Bsv repository path. Overides default paths and BSV_REPOSITORY environment variable.
|
||||
Force or disable colors, or auto-detect terminal support.
|
||||
""").strip(),
|
||||
)
|
||||
parent_parser.add_argument(
|
||||
"--config",
|
||||
default = default_bsv_config_path(),
|
||||
type = Path,
|
||||
dest = "config_path",
|
||||
help = dedent("""
|
||||
Bsv config path. Overrides default paths and BSV_CONFIG environment variable.
|
||||
""").strip(),
|
||||
)
|
||||
|
||||
@@ -68,16 +80,16 @@ def main(
|
||||
)
|
||||
arg_dict = vars(parser.parse_args(args or sys.argv[1:]))
|
||||
|
||||
repository_path: Path | None = arg_dict.pop("repository")
|
||||
if repository_path is None and "BSV_REPOSITORY" in os.environ:
|
||||
repository_path = Path(os.environ["BSV_REPOSITORY"])
|
||||
# else:
|
||||
# for path in get_config_dirs():
|
||||
# maybe_config_path = path / "config.toml"
|
||||
# if maybe_config_path.is_file():
|
||||
# config_path = maybe_config_path
|
||||
# break
|
||||
color = arg_dict.pop("color")
|
||||
init_consoles(color=color)
|
||||
|
||||
command = arg_dict.pop("command")
|
||||
|
||||
return command(repository_path=repository_path, **arg_dict)
|
||||
try:
|
||||
return command(**arg_dict)
|
||||
except Exception as err:
|
||||
get_error_console().print_exception()
|
||||
except KeyboardInterrupt:
|
||||
return 130
|
||||
|
||||
return 0
|
||||
@@ -15,33 +15,30 @@
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import BinaryIO
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||
data = stream.read(num_bytes)
|
||||
if len(data) != num_bytes:
|
||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||
return data
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Digest:
|
||||
digest: bytes = b""
|
||||
|
||||
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
||||
data = stream.read(num_bytes)
|
||||
if not data:
|
||||
return None
|
||||
if len(data) != num_bytes:
|
||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||
return data
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.digest)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.digest.hex()
|
||||
|
||||
|
||||
class Hash(ABC):
|
||||
name: str
|
||||
digest_size: int
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class ObjectInfo:
|
||||
digest: Digest
|
||||
object_type: bytes
|
||||
size: int
|
||||
|
||||
@abstractmethod
|
||||
def update(self, *data: bytes | bytearray | memoryview):
|
||||
...
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||
|
||||
@abstractmethod
|
||||
def digest(self) -> bytes:
|
||||
...
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Object(ObjectInfo):
|
||||
data: bytes
|
||||
89
src/bsv.bak/path_map.py
Normal file
89
src/bsv.bak/path_map.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from itertools import pairwise
|
||||
|
||||
from pathlib import Path, PurePosixPath
|
||||
from typing import Any
|
||||
|
||||
from bsv.exception import UnmappedPath
|
||||
|
||||
|
||||
@dataclass(order=True, frozen=True, slots=True)
|
||||
class PathPair:
|
||||
bsv: PurePosixPath
|
||||
fs: Path
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.bsv.is_absolute() or not self.fs.is_absolute():
|
||||
raise ValueError("paths in path_map must be absolute")
|
||||
super().__setattr__("fs", self.fs.resolve())
|
||||
|
||||
@classmethod
|
||||
def from_obj(cls, obj: dict[str, str]) -> PathPair:
|
||||
return cls(
|
||||
bsv = PurePosixPath(obj["bsv"]),
|
||||
fs = Path(obj["fs"]),
|
||||
)
|
||||
|
||||
|
||||
class PathMap:
|
||||
pairs: list[PathPair]
|
||||
|
||||
def __init__(self, pairs: list[PathPair]=[]):
|
||||
self.pairs = sorted(pairs)
|
||||
for path0, path1 in pairwise(self.pairs):
|
||||
if path0 == path1 or path1.bsv.relative_to(path0.bsv):
|
||||
raise ValueError("bsv paths must be unique and independent")
|
||||
|
||||
@classmethod
|
||||
def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
|
||||
return cls([
|
||||
PathPair.from_obj(item)
|
||||
for item in obj
|
||||
])
|
||||
|
||||
def mount_point(self, fs_path: Path) -> PathPair:
|
||||
fs_path = fs_path.resolve()
|
||||
best_pair = None
|
||||
best_relative = None
|
||||
for pair in self.pairs:
|
||||
try:
|
||||
relative = fs_path.relative_to(pair.fs)
|
||||
if not best_pair or not best_relative or len(relative.parts) < len(best_relative.parts):
|
||||
best_pair = pair
|
||||
best_relative = relative
|
||||
except ValueError:
|
||||
pass
|
||||
if best_pair is None:
|
||||
raise UnmappedPath(f"unmapped fs path {fs_path}")
|
||||
return best_pair
|
||||
|
||||
def relative_bsv_path(self, fs_target: Path, relative_to: Path) -> PurePosixPath:
|
||||
if not relative_to.is_absolute():
|
||||
relative_to = fs_target / relative_to
|
||||
|
||||
fs_target = fs_target.resolve()
|
||||
relative_to = relative_to.resolve()
|
||||
|
||||
target_mount_point = self.mount_point(fs_target)
|
||||
base_mount_point = self.mount_point(relative_to)
|
||||
|
||||
raise NotImplemented("not implemented yet")
|
||||
|
||||
def clone(self) -> PathMap:
|
||||
return PathMap(self.pairs)
|
||||
679
src/bsv.bak/repository.py
Normal file
679
src/bsv.bak/repository.py
Normal file
@@ -0,0 +1,679 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime as DateTime
|
||||
import hashlib
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePosixPath
|
||||
import tomllib
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, Self
|
||||
|
||||
from fastcdc import fastcdc
|
||||
|
||||
from bsv import __version__
|
||||
from bsv.exception import ConfigError
|
||||
from bsv.object import ObjectInfo
|
||||
from bsv.path_map import PathMap
|
||||
from bsv.simple_cas import SimpleCas
|
||||
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||
from bsv.util import default_bsv_config_path, default_local_repository_path, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bsv.tree_walker import TreeWalker
|
||||
|
||||
|
||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||
DEFAULT_AVG_CHUNK_SIZE = 1 << 16
|
||||
DEFAULT_MAX_CHUNK_SIZE = 1 << 20
|
||||
|
||||
|
||||
class Repository:
|
||||
_config_path: Path
|
||||
_device_name: str
|
||||
_local_repository_path: Path
|
||||
|
||||
_cas: SimpleCas
|
||||
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
||||
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
||||
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
||||
|
||||
_path_map: PathMap
|
||||
# _remotes: list[object]
|
||||
|
||||
_context_depth: int = 0
|
||||
|
||||
def __init__(self, config_path: Path):
|
||||
self._config_path = config_path.resolve()
|
||||
|
||||
with self._config_path.open("rb") as stream:
|
||||
config = tomllib.load(stream)
|
||||
|
||||
bsv = config.get("bsv", {})
|
||||
def get(key: str) -> Any:
|
||||
value = bsv.get(key)
|
||||
if value is None:
|
||||
raise ConfigError(f"invalid bsv configuration: missing bsv.{key} item")
|
||||
return value
|
||||
|
||||
self._device_name = get("device_name")
|
||||
self._local_repository_path = Path(get("local_repository"))
|
||||
self._min_chunk_size = get("min_chunk_size")
|
||||
self._avg_chunk_size = get("avg_chunk_size")
|
||||
self._max_chunk_size = get("max_chunk_size")
|
||||
self._path_map = PathMap.from_obj(get("path_map"))
|
||||
|
||||
self._cas = make_cas(self._local_repository_path)
|
||||
|
||||
|
||||
@property
|
||||
def config_path(self) -> Path:
|
||||
return self._config_path
|
||||
|
||||
@property
|
||||
def device_name(self) -> str:
|
||||
return self._device_name
|
||||
|
||||
@property
|
||||
def path_map(self) -> PathMap:
|
||||
return self._path_map.clone()
|
||||
|
||||
def get_blob(self, digest: Digest) -> BlobObject:
|
||||
with self:
|
||||
obj, blob = self._read(digest, object_type=b"blob")
|
||||
return BlobObject(
|
||||
digest = obj.digest,
|
||||
object_type = obj.object_type,
|
||||
size = obj.size,
|
||||
blob = blob,
|
||||
)
|
||||
|
||||
def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||
with self:
|
||||
return self._write(b"blob", stream, dry_run=dry_run)
|
||||
|
||||
def get_symlink(self, digest: Digest) -> SymlinkObject:
|
||||
with self:
|
||||
obj = self._cas.read(digest, object_type=b"slnk")
|
||||
return SymlinkObject(
|
||||
digest = obj.digest,
|
||||
object_type = obj.object_type,
|
||||
size = obj.size,
|
||||
symlink = Symlink.from_bytes(self, obj.data),
|
||||
)
|
||||
|
||||
def add_symlink(self, symlink: Symlink, *, dry_run: bool=False) -> SymlinkObject:
|
||||
with self:
|
||||
data = symlink.to_bytes()
|
||||
return SymlinkObject(
|
||||
digest = self._cas.write(b"slnk", data, dry_run=dry_run),
|
||||
object_type = b"slnk",
|
||||
size = len(data),
|
||||
symlink = symlink,
|
||||
)
|
||||
|
||||
def add_symlink_from_fs_target(self, fs_symlink: Path, fs_target: Path, *, dry_run: bool=False) -> SymlinkObject:
|
||||
assert fs_symlink.is_absolute()
|
||||
return self.add_symlink(
|
||||
Symlink(
|
||||
repo = self,
|
||||
is_absolute = fs_target.is_absolute(),
|
||||
target = self._path_map.relative_bsv_path(fs_target, relative_to=fs_symlink),
|
||||
),
|
||||
dry_run = dry_run,
|
||||
)
|
||||
|
||||
def get_tree(self, digest: Digest) -> TreeObject:
|
||||
with self:
|
||||
obj = self._cas.read(digest, object_type=b"tree")
|
||||
return TreeObject(
|
||||
digest = obj.digest,
|
||||
object_type = obj.object_type,
|
||||
size = obj.size,
|
||||
tree = Tree.from_bytes(self, obj.data),
|
||||
)
|
||||
|
||||
def add_tree(self, tree: Tree, *, dry_run: bool=False) -> TreeObject:
|
||||
with self:
|
||||
data = tree.to_bytes()
|
||||
return TreeObject(
|
||||
digest = self._cas.write(b"tree", data, dry_run=dry_run),
|
||||
object_type = b"tree",
|
||||
size = len(data),
|
||||
tree = tree,
|
||||
)
|
||||
|
||||
def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> TreeObject:
|
||||
from bsv.tree_walker import TreeWalker
|
||||
walker = TreeWalker(self, dry_run=dry_run)
|
||||
return walker.add_tree(path)
|
||||
|
||||
def get_snapshot(self, digest: Digest) -> SnapshotObject:
|
||||
with self:
|
||||
obj = self._cas.read(digest, object_type=b"snap")
|
||||
return SnapshotObject(
|
||||
digest = obj.digest,
|
||||
object_type = obj.object_type,
|
||||
size = obj.size,
|
||||
snapshot = Snapshot.from_bytes(self, obj.data),
|
||||
)
|
||||
|
||||
def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> SnapshotObject:
|
||||
with self:
|
||||
data = snapshot.to_bytes()
|
||||
return SnapshotObject(
|
||||
digest = self._cas.write(b"snap", data, dry_run=dry_run),
|
||||
object_type = b"snap",
|
||||
size = len(data),
|
||||
snapshot = snapshot,
|
||||
)
|
||||
|
||||
# def take_snapshot(
|
||||
# self,
|
||||
# parent_digests: list[Digest] = [],
|
||||
# *,
|
||||
# walker_type: Type[TreeWalker] | None = None,
|
||||
# dry_run: bool = False,
|
||||
# ):
|
||||
# from bsv.tree_walker import TreeWalker
|
||||
|
||||
# walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
|
||||
|
||||
# # parents = [
|
||||
# # self.get_snapshot(digest)
|
||||
# # for digest in parent_digests
|
||||
# # ]
|
||||
# parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
|
||||
|
||||
# snapshot = Snapshot(
|
||||
# repo = self,
|
||||
# tree_digest = walker.add_virtual_tree(self._path_map, parent=),
|
||||
# parents = parent_digests,
|
||||
# repo_name = self._name,
|
||||
# timestamp = timestamp_us_from_time(DateTime.now()),
|
||||
# )
|
||||
# return self.add_snapshot(snapshot, dry_run=dry_run)
|
||||
|
||||
|
||||
def get_ref(self, key: str) -> Digest | None:
|
||||
return self._cas.get_ref(key)
|
||||
|
||||
def set_ref(self, key: str, digest: Digest):
|
||||
self._cas.set_ref(key, digest)
|
||||
|
||||
def get_head_snapshot(self) -> Digest | None:
|
||||
return self.get_ref("HEAD")
|
||||
|
||||
|
||||
def _read(self, digest: Digest, object_type: bytes) -> tuple[ObjectInfo, Blob]:
|
||||
obj = self._cas.read(digest, object_type=object_type)
|
||||
stream = BytesIO(obj.data)
|
||||
return obj, Blob.from_stream(self, stream, digest_size=self._cas._digest_size)
|
||||
|
||||
def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||
out = BytesIO()
|
||||
size = 0
|
||||
chunks = []
|
||||
for chunk in fastcdc(
|
||||
stream,
|
||||
min_size = self._min_chunk_size,
|
||||
avg_size = self._avg_chunk_size,
|
||||
max_size = self._max_chunk_size,
|
||||
fat = True,
|
||||
):
|
||||
size += chunk.length
|
||||
digest = self._cas.write(b"chnk", chunk.data, dry_run=dry_run)
|
||||
chunks.append(Chunk(digest, chunk.length))
|
||||
out.write(digest.digest)
|
||||
out.write(chunk.length.to_bytes(4))
|
||||
return BlobObject(
|
||||
digest = self._cas.write(object_type, size.to_bytes(8) + out.getvalue()),
|
||||
object_type = object_type,
|
||||
size = 8 + len(out.getvalue()),
|
||||
blob = Blob(
|
||||
repo = self,
|
||||
size = size,
|
||||
chunks = chunks,
|
||||
)
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
if self._context_depth == 0:
|
||||
self._cas.__enter__()
|
||||
self._context_depth += 1
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._context_depth -= 1
|
||||
if self._context_depth == 0:
|
||||
return self._cas.__exit__(exc_type, exc_value, traceback)
|
||||
|
||||
|
||||
def check_config_path(path: Path):
|
||||
if path.exists():
|
||||
raise ValueError(f"{path} already exists.")
|
||||
if path != default_bsv_config_path() and not path.parent.is_dir():
|
||||
raise ValueError(f"{path.parent} does not exist or is not a directory.")
|
||||
|
||||
def check_device_name(device_name: str):
|
||||
if not device_name:
|
||||
raise ValueError("Device name cannot be empty.")
|
||||
if not device_name.isidentifier():
|
||||
raise ValueError(f"{device_name} is not a valid device name.")
|
||||
|
||||
def check_local_repository_path(path: Path):
|
||||
if path != default_local_repository_path() and not path.parent.exists():
|
||||
raise ValueError(f"Directory {path.parent} does not exists.")
|
||||
if path.exists() and not path.is_dir():
|
||||
raise ValueError(f"{path} exists but is not a directory.")
|
||||
if path.exists() and len(list(path.iterdir())):
|
||||
raise ValueError(f"Local repository directory {path} is not empty.")
|
||||
|
||||
def create_repository(
|
||||
config_path: Path,
|
||||
device_name: str,
|
||||
local_repository_path: Path,
|
||||
cas: str = "simple",
|
||||
hash: str = "sha256",
|
||||
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
||||
avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE,
|
||||
max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
|
||||
):
|
||||
from datetime import datetime as DateTime
|
||||
from os import getlogin
|
||||
|
||||
import tomlkit
|
||||
|
||||
check_config_path(config_path)
|
||||
check_device_name(device_name)
|
||||
check_local_repository_path(local_repository_path)
|
||||
|
||||
if config_path == default_bsv_config_path():
|
||||
try:
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
except:
|
||||
raise RuntimeError(f"failed to create bsv config destination directory {config_path.parent}")
|
||||
|
||||
try:
|
||||
local_repository_path.mkdir(exist_ok=True)
|
||||
except:
|
||||
raise RuntimeError(f"failed to create local repository directory {local_repository_path}")
|
||||
|
||||
bsv_table = tomlkit.table()
|
||||
|
||||
bsv_table.add(tomlkit.comment("Name of the instance."))
|
||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected devices."))
|
||||
bsv_table.add("device_name", device_name)
|
||||
|
||||
bsv_table.add(tomlkit.nl())
|
||||
bsv_table.add(tomlkit.comment("Path to the local repository."))
|
||||
bsv_table.add("local_repository", str(local_repository_path))
|
||||
|
||||
bsv_table.add(tomlkit.nl())
|
||||
bsv_table.add(tomlkit.comment("Properties of the content-based chunking algorithm."))
|
||||
bsv_table.add("min_chunk_size", min_chunk_size)
|
||||
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
||||
bsv_table.add("max_chunk_size", max_chunk_size)
|
||||
|
||||
bsv_table.add(tomlkit.nl())
|
||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the local filesystem."))
|
||||
bsv_table.add("path_map", tomlkit.array())
|
||||
|
||||
bsv_config = tomlkit.document()
|
||||
bsv_config.add(tomlkit.comment("bsv device configuration"))
|
||||
bsv_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||
bsv_config.add(tomlkit.nl())
|
||||
bsv_config.add("bsv", bsv_table)
|
||||
|
||||
|
||||
cas_table = tomlkit.table()
|
||||
cas_table.add("type", cas)
|
||||
cas_table.add("hash", hash)
|
||||
|
||||
cas_config = tomlkit.document()
|
||||
cas_config.add(tomlkit.comment(f"bsv local repository configuration for instance {config_path}."))
|
||||
cas_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||
cas_config.add(tomlkit.nl())
|
||||
cas_config.add("cas", cas_table)
|
||||
|
||||
try:
|
||||
bsv_stream = config_path.open("w", encoding="utf-8")
|
||||
except:
|
||||
raise RuntimeError(f"failed to open bsv configuration file {config_path}")
|
||||
try:
|
||||
cas_stream = (local_repository_path / "bsv_repository.config").open("w", encoding="utf-8")
|
||||
except:
|
||||
raise RuntimeError(f"failed to open local repository configuration file {config_path}")
|
||||
|
||||
with bsv_stream:
|
||||
tomlkit.dump(bsv_config, bsv_stream)
|
||||
|
||||
with cas_stream:
|
||||
tomlkit.dump(cas_config, cas_stream)
|
||||
|
||||
repo = Repository(config_path)
|
||||
|
||||
return repo
|
||||
|
||||
|
||||
def make_cas(cas_config_path: Path) -> SimpleCas:
|
||||
with (cas_config_path / "bsv_repository.config").open("rb") as stream:
|
||||
config = tomllib.load(stream)
|
||||
|
||||
cas = config.get("cas", {})
|
||||
def get(key: str) -> Any:
|
||||
value = cas.get(key)
|
||||
if value is None:
|
||||
raise ConfigError(f"invalid repository configuration: missing {key} item")
|
||||
return value
|
||||
|
||||
type = get("type")
|
||||
hash_factory = lambda: hashlib.new(get("hash"))
|
||||
if type == "simple":
|
||||
return SimpleCas(cas_config_path, hash_factory) # type: ignore
|
||||
raise ConfigError(f"unknown cas type {type}")
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ChunkedObject:
|
||||
repo: Repository
|
||||
size: int
|
||||
chunks: list[Chunk]
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> Self:
|
||||
self = cls(
|
||||
repo = repo,
|
||||
size = int.from_bytes(read_exact(stream, 8)),
|
||||
chunks = [],
|
||||
)
|
||||
while (chunk := Chunk.from_stream(stream, digest_size)) is not None:
|
||||
self.chunks.append(chunk)
|
||||
return self
|
||||
|
||||
def reader(self) -> ChunkedObjectReader:
|
||||
return ChunkedObjectReader(self)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Chunk:
|
||||
digest: Digest
|
||||
size: int
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
digest = Digest(digest_bytes)
|
||||
|
||||
return cls(
|
||||
digest = digest,
|
||||
size = int.from_bytes(read_exact(stream, 4)),
|
||||
)
|
||||
|
||||
|
||||
class ChunkedObjectReader:
|
||||
_chunked_object: ChunkedObject
|
||||
_chunk_index: int = 0
|
||||
_chunk_data: bytes = b""
|
||||
|
||||
def __init__(self, chunked_object: ChunkedObject):
|
||||
self._chunked_object = chunked_object
|
||||
|
||||
def read(self, num_bytes: int = -1) -> bytes:
|
||||
chunks = self._chunked_object.chunks
|
||||
parts = [self._chunk_data]
|
||||
size = len(parts[-1])
|
||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
|
||||
parts.append(self.read1())
|
||||
size += len(parts[-1])
|
||||
if num_bytes >= 0:
|
||||
self._chunk_data = parts[-1][num_bytes - size:]
|
||||
else:
|
||||
self._chunk_data = b""
|
||||
return b"".join(parts)
|
||||
|
||||
def read1(self) -> bytes:
|
||||
cas = self._chunked_object.repo._cas
|
||||
chunks = self._chunked_object.chunks
|
||||
if self._chunk_index == len(chunks):
|
||||
return b""
|
||||
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||
self._chunk_index += 1
|
||||
return object.data
|
||||
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Blob(ChunkedObject):
|
||||
pass
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class BlobObject(ObjectInfo):
|
||||
blob: Blob
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Symlink:
|
||||
repo: Repository
|
||||
is_absolute: bool
|
||||
target: PurePosixPath
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||
return cls(
|
||||
repo = repo,
|
||||
is_absolute = bool(read_exact(stream, 1)),
|
||||
target = PurePosixPath(stream.read().decode("utf-8")),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, repo: Repository, bytes: bytes) -> Self:
|
||||
stream = BytesIO(bytes)
|
||||
return cls.from_stream(repo, stream)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
stream.write(self.is_absolute.to_bytes(1))
|
||||
stream.write(self.target.as_posix().encode("utf-8"))
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
stream = BytesIO()
|
||||
self.write(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class SymlinkObject(ObjectInfo):
|
||||
symlink: Symlink
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tree:
|
||||
repo: Repository
|
||||
items: list[TreeItem]
|
||||
|
||||
@property
|
||||
def total_size(self) -> int:
|
||||
return sum(
|
||||
item.size
|
||||
for item in self.items
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||
tree = Tree(repo, [])
|
||||
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
||||
tree.items.append(item)
|
||||
return tree
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||
stream = BytesIO(data)
|
||||
return cls.from_stream(repo, stream)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
self.items.sort(key=lambda i: i.name)
|
||||
for item in self.items:
|
||||
item.write(stream)
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
stream = BytesIO()
|
||||
self.write(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.items)
|
||||
|
||||
def get(self, key: str) -> TreeItem | None:
|
||||
for item in self.items:
|
||||
if item.name == key:
|
||||
return item
|
||||
return None
|
||||
|
||||
def __getitem__(self, key: str) -> TreeItem:
|
||||
item = self.get(key)
|
||||
if item is None:
|
||||
raise KeyError(f"{key} not found")
|
||||
return item
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class TreeObject(ObjectInfo):
|
||||
tree: Tree
|
||||
|
||||
@property
|
||||
def total_size(self) -> int:
|
||||
return self.size + self.tree.total_size
|
||||
|
||||
|
||||
@dataclass
|
||||
class TreeItem:
|
||||
digest: Digest
|
||||
object_type: bytes
|
||||
size: int
|
||||
permissions: int
|
||||
modification_timestamp_us: int
|
||||
name: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
digest: Digest,
|
||||
object_type: bytes,
|
||||
size: int,
|
||||
permissions: int,
|
||||
modification_timestamp_us: int,
|
||||
name: str,
|
||||
):
|
||||
if "/\\" in name:
|
||||
raise ValueError(f"invalid tree item name {name}")
|
||||
self.digest = digest
|
||||
self.object_type = object_type
|
||||
self.size = size
|
||||
self.permissions = permissions
|
||||
self.modification_timestamp_us = modification_timestamp_us
|
||||
self.name = name
|
||||
|
||||
@property
|
||||
def modification_time(self) -> DateTime:
|
||||
return time_from_timestamp_us(self.modification_timestamp_us)
|
||||
@modification_time.setter
|
||||
def modification_time(self, time: DateTime):
|
||||
self.modification_timestamp_us = timestamp_us_from_time(time)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
return TreeItem(
|
||||
digest = Digest(digest_bytes),
|
||||
object_type = read_exact(stream, 4),
|
||||
size = int.from_bytes(read_exact(stream, 8)),
|
||||
permissions = int.from_bytes(read_exact(stream, 2)),
|
||||
modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||
)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
stream.write(self.digest.digest)
|
||||
stream.write(self.object_type)
|
||||
stream.write(self.size.to_bytes(8))
|
||||
stream.write(self.permissions.to_bytes(2))
|
||||
stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
|
||||
name_bytes = self.name.encode("utf-8")
|
||||
stream.write(len(name_bytes).to_bytes(2))
|
||||
stream.write(name_bytes)
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class Snapshot:
|
||||
repo: Repository
|
||||
tree_digest: Digest
|
||||
parents: list[Digest]
|
||||
repo_name: str
|
||||
timestamp_us: int
|
||||
|
||||
def __post_init__(self):
|
||||
assert len(self.parents) < 256
|
||||
|
||||
@property
|
||||
def time(self) -> DateTime:
|
||||
return time_from_timestamp_us(self.timestamp_us)
|
||||
@time.setter
|
||||
def time(self, time: DateTime):
|
||||
self.timestamp_us = timestamp_us_from_time(time)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||
return Snapshot(
|
||||
repo = repo,
|
||||
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
||||
parents = [
|
||||
Digest(read_exact(stream, repo._cas._digest_size))
|
||||
for _ in range(int.from_bytes(read_exact(stream, 1)))
|
||||
],
|
||||
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||
timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||
stream = BytesIO(data)
|
||||
return cls.from_stream(repo, stream)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
assert len(self.parents) < 256
|
||||
stream.write(self.tree_digest.digest)
|
||||
stream.write(len(self.parents).to_bytes(1))
|
||||
for parent in self.parents:
|
||||
stream.write(parent.digest)
|
||||
repo_name_bytes = self.repo_name.encode("utf-8")
|
||||
stream.write(len(repo_name_bytes).to_bytes(2))
|
||||
stream.write(repo_name_bytes)
|
||||
stream.write(self.timestamp_us.to_bytes(8, signed=True))
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
stream = BytesIO()
|
||||
self.write(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class SnapshotObject(ObjectInfo):
|
||||
snapshot: Snapshot
|
||||
@@ -19,21 +19,10 @@ from dataclasses import dataclass
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Callable, Iterator
|
||||
from bsv.exception import NotFound, UnexpectedObjectType
|
||||
from bsv.object import Digest, Object, ObjectInfo
|
||||
|
||||
from bsv.simple_cas.util import Hash, read_exact_or_eof
|
||||
|
||||
|
||||
class BsvError(RuntimeError):
|
||||
pass
|
||||
|
||||
class NotFound(BsvError):
|
||||
pass
|
||||
|
||||
class UnexpectedObjectType(BsvError):
|
||||
pass
|
||||
|
||||
class ConfigError(BsvError):
|
||||
pass
|
||||
from bsv.util import Hash, read_exact_or_eof
|
||||
|
||||
|
||||
class SimpleCas:
|
||||
@@ -105,21 +94,19 @@ class SimpleCas:
|
||||
assert size == item.size
|
||||
data = stream.read(size)
|
||||
|
||||
return Object(object_type, data)
|
||||
return Object(digest, object_type, size, data)
|
||||
|
||||
def write(self, object_type: bytes, data: bytes) -> Digest:
|
||||
def write(self, object_type: bytes, data: bytes, dry_run: bool=False) -> Digest:
|
||||
assert len(object_type) == 4
|
||||
assert len(data) < 2**32
|
||||
|
||||
hash = self._hash_factory()
|
||||
hash.update(object_type)
|
||||
hash.update(b"\0")
|
||||
hash.update(len(data).to_bytes(4))
|
||||
hash.update(b"\0")
|
||||
hash.update(data)
|
||||
digest = Digest(hash.digest())
|
||||
|
||||
if digest not in self:
|
||||
if not dry_run and digest not in self:
|
||||
with self._open_writer(digest, object_type, len(data)) as out:
|
||||
out.write(digest.digest)
|
||||
out.write(object_type)
|
||||
@@ -164,23 +151,7 @@ class SimpleCas:
|
||||
return self._root_dir / "refs" / key_path
|
||||
|
||||
|
||||
@dataclass(frozen=True, order=True, slots=True)
|
||||
class Digest:
|
||||
digest: bytes
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.digest.hex()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Object:
|
||||
object_type: bytes
|
||||
data: bytes
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
|
||||
|
||||
@dataclass
|
||||
@dataclass(frozen=True)
|
||||
class IndexItem:
|
||||
object_type: bytes
|
||||
offset: int
|
||||
@@ -188,12 +159,3 @@ class IndexItem:
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
||||
|
||||
@dataclass
|
||||
class ObjectInfo:
|
||||
digest: Digest
|
||||
object_type: bytes
|
||||
size: int
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||
268
src/bsv.bak/tree_walker.py
Normal file
268
src/bsv.bak/tree_walker.py
Normal file
@@ -0,0 +1,268 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime as DateTime, timedelta as TimeDelta
|
||||
from enum import Enum
|
||||
from os import stat_result
|
||||
from pathlib import Path
|
||||
import stat
|
||||
|
||||
from bsv.object import Digest, ObjectInfo
|
||||
from bsv.repository import BlobObject, Repository, SymlinkObject, Tree, TreeItem, TreeObject
|
||||
from bsv.util import is_bsv_repository, object_type_from_mode
|
||||
|
||||
|
||||
class Action(Enum):
|
||||
ADD = "add"
|
||||
UPDATE = "update"
|
||||
REMOVE = "remove"
|
||||
IGNORE = "ignore"
|
||||
ERROR = "error"
|
||||
|
||||
@classmethod
|
||||
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
|
||||
assert digest
|
||||
if not source_digest:
|
||||
return Action.ADD, None
|
||||
elif source_digest == digest:
|
||||
return Action.IGNORE, IgnoreCause.UNCHANGED
|
||||
else:
|
||||
return Action.UPDATE, None
|
||||
|
||||
class IgnoreCause(Enum):
|
||||
IGNORE_RULE = "ignore_rule"
|
||||
UNCHANGED = "unchanged"
|
||||
UNSUPPORTED_TYPE = "unsupported_type"
|
||||
|
||||
|
||||
class TreeWalker:
|
||||
_repo: Repository
|
||||
_time_rounding_us: int = 2000000
|
||||
_force_hash: bool = False
|
||||
_dry_run: bool = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo: Repository,
|
||||
*,
|
||||
time_rounding_us: int = 2000000,
|
||||
force_hash: bool = False,
|
||||
dry_run: bool = False,
|
||||
):
|
||||
self._repo = repo
|
||||
self._time_rounding_us = time_rounding_us
|
||||
self._force_hash = force_hash
|
||||
self._dry_run = dry_run
|
||||
|
||||
# def add_virtual_tree(self, paths: PathMap) -> Digest:
|
||||
# assert paths
|
||||
|
||||
# fs_paths = sorted([
|
||||
# pair.fs
|
||||
# for pair in paths.pairs
|
||||
# ])
|
||||
|
||||
# tree_map = {
|
||||
# fs_paths[0]: self.add_tree()
|
||||
# }
|
||||
# root = {}
|
||||
# for pair in paths.paths:
|
||||
# vdir = root
|
||||
# for part in pair.bsv.parts[:-1]:
|
||||
# vdir = vdir.setdefault(part, {})
|
||||
# vdir[pair.bsv.parts[-1]] = pair.fs
|
||||
|
||||
# return self._add_virtual_tree(root)
|
||||
|
||||
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
|
||||
# tree = Tree(self._repo, [])
|
||||
# for name, value in vtree.items():
|
||||
# if isinstance(value, dict):
|
||||
# digest = self._add_virtual_tree(value)
|
||||
# elif isinstance(value, Path):
|
||||
# digest = self.add_tree(value)
|
||||
# else:
|
||||
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
|
||||
# tree.items.append(TreeItem(
|
||||
# digest = digest,
|
||||
# object_type = b"tree",
|
||||
# size = 0,
|
||||
# permissions = 0o766,
|
||||
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
|
||||
# name = name,
|
||||
# ))
|
||||
# return self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||
|
||||
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> TreeObject:
|
||||
pstat = path.stat(follow_symlinks=False)
|
||||
if self.ignore(path, pstat):
|
||||
raise ValueError(f"path {path} is ignored")
|
||||
# self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
||||
# return Digest()
|
||||
return self._add_tree(path, pstat, source_digest=source_digest)
|
||||
|
||||
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> TreeObject:
|
||||
source = self._repo.get_tree(source_digest).tree if source_digest else None
|
||||
|
||||
tree = Tree(self._repo, [])
|
||||
subpaths = sorted(path.iterdir())
|
||||
|
||||
subpath_index = 0
|
||||
source_item_index = 0
|
||||
|
||||
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
|
||||
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
|
||||
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
|
||||
|
||||
if subpath and source_item:
|
||||
if subpath.name < source_item.name:
|
||||
source_item = None
|
||||
elif subpath.name > source_item.name:
|
||||
subpath = None
|
||||
|
||||
if subpath is not None:
|
||||
subpath_index += 1
|
||||
if source_item is not None:
|
||||
source_item_index += 1
|
||||
|
||||
if subpath is not None:
|
||||
obj: ObjectInfo | None = None
|
||||
try:
|
||||
istat = subpath.lstat()
|
||||
|
||||
if self.ignore(subpath, istat, source=source_item):
|
||||
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
|
||||
continue
|
||||
|
||||
if (source_item is not None and
|
||||
not self._force_hash and
|
||||
not stat.S_ISDIR(istat.st_mode) and
|
||||
pstat.st_size == source_item.size and
|
||||
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
|
||||
):
|
||||
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
|
||||
tree.items.append(source_item)
|
||||
continue
|
||||
|
||||
sub_source_digest = source_item and source_item.digest
|
||||
object_type = object_type_from_mode(istat.st_mode)
|
||||
if object_type == b"slnk":
|
||||
obj = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
|
||||
size = obj.size
|
||||
elif object_type == b"tree":
|
||||
obj = self._add_tree(subpath, istat, source_digest=sub_source_digest)
|
||||
size = obj.total_size
|
||||
elif object_type == b"blob":
|
||||
obj = self._add_blob(subpath, istat, source_digest=sub_source_digest)
|
||||
size = istat.st_size
|
||||
else:
|
||||
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
||||
continue
|
||||
except Exception as err:
|
||||
self.report(Action.ERROR, subpath, None, err)
|
||||
continue
|
||||
|
||||
if obj:
|
||||
tree.items.append(TreeItem(
|
||||
digest = obj.digest,
|
||||
object_type = object_type,
|
||||
size = size,
|
||||
permissions = stat.S_IMODE(istat.st_mode),
|
||||
modification_timestamp_us = istat.st_mtime_ns // 1000,
|
||||
name = subpath.name,
|
||||
))
|
||||
elif source_item:
|
||||
self.report(Action.REMOVE, path / source_item.name, None, source_item)
|
||||
|
||||
tree_object = self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||
|
||||
action, info = Action.from_digests(tree_object.digest, source_digest)
|
||||
self.report(action, path, pstat, info)
|
||||
return tree_object
|
||||
|
||||
|
||||
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> SymlinkObject:
|
||||
# TODO: Store symlink relative to current dir ?
|
||||
# * What about symlink that points outside of the backup dirs
|
||||
# * Should symlinks that points inside the backup dirs but in another
|
||||
# mount-point adjusted ?
|
||||
# * Should absolute symlink be restored as absolute ?
|
||||
obj = self._repo.add_symlink_from_fs_target(path, path.readlink())
|
||||
|
||||
action, info = Action.from_digests(obj.digest, source_digest)
|
||||
self.report(action, path, pstat, info)
|
||||
return obj
|
||||
|
||||
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> BlobObject:
|
||||
with path.open("rb") as stream:
|
||||
obj = self._repo.add_blob(stream, dry_run=self._dry_run)
|
||||
|
||||
action, info = Action.from_digests(obj.digest, source_digest)
|
||||
self.report(action, path, pstat, info)
|
||||
return obj
|
||||
|
||||
|
||||
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
|
||||
return is_bsv_repository(path)
|
||||
|
||||
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
|
||||
match action, info:
|
||||
case (Action.ADD, None):
|
||||
print(f"Add: {path}")
|
||||
case (Action.UPDATE, None):
|
||||
print(f"Add: {path}")
|
||||
case (Action.REMOVE, item) if isinstance(item, TreeItem):
|
||||
print(f"Remove: {path / item.name}")
|
||||
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
||||
print(f"Ignore (rule): {path}")
|
||||
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
||||
print(f"Ignore (unchanged): {path}")
|
||||
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
||||
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
||||
case (Action.ERROR, _) if isinstance(info, Exception):
|
||||
print(f"Error {info}: {path}")
|
||||
case _:
|
||||
raise ValueError("TreeWalker.report(): unsupported parameter combination")
|
||||
|
||||
|
||||
def path_type_name(pstat: stat_result) -> str:
|
||||
parts = []
|
||||
|
||||
if stat.S_ISBLK(pstat.st_mode):
|
||||
parts.append("block_device")
|
||||
if stat.S_ISCHR(pstat.st_mode):
|
||||
parts.append("char_device")
|
||||
if stat.S_ISDIR(pstat.st_mode):
|
||||
parts.append("dir")
|
||||
if stat.S_ISDOOR(pstat.st_mode):
|
||||
parts.append("door")
|
||||
if stat.S_ISFIFO(pstat.st_mode):
|
||||
parts.append("fifo")
|
||||
if stat.S_ISLNK(pstat.st_mode):
|
||||
parts.append("symlink")
|
||||
if stat.S_ISPORT(pstat.st_mode):
|
||||
parts.append("port")
|
||||
if stat.S_ISREG(pstat.st_mode):
|
||||
parts.append("file")
|
||||
if stat.S_ISSOCK(pstat.st_mode):
|
||||
parts.append("socket")
|
||||
if stat.S_ISWHT(pstat.st_mode):
|
||||
parts.append("whiteout")
|
||||
|
||||
if not parts:
|
||||
return "unknown"
|
||||
return ", ".join(parts)
|
||||
118
src/bsv.bak/util.py
Normal file
118
src/bsv.bak/util.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||
import os
|
||||
from pathlib import Path
|
||||
import platform
|
||||
import stat
|
||||
from typing import BinaryIO
|
||||
|
||||
|
||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||
|
||||
|
||||
def time_from_timestamp_us(timestamp: int) -> DateTime:
|
||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||
|
||||
def timestamp_us_from_time(time: DateTime) -> int:
|
||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||
|
||||
|
||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||
data = stream.read(num_bytes)
|
||||
if len(data) != num_bytes:
|
||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||
return data
|
||||
|
||||
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
||||
data = stream.read(num_bytes)
|
||||
if not data:
|
||||
return None
|
||||
if len(data) != num_bytes:
|
||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||
return data
|
||||
|
||||
|
||||
def is_bsv_repository(path: Path) -> bool:
|
||||
return (path / "bsv_repository.config").is_file()
|
||||
|
||||
|
||||
def object_type_from_path(path: Path) -> bytes:
|
||||
return object_type_from_mode(path.stat(follow_symlinks=False).st_mode)
|
||||
|
||||
def object_type_from_mode(mode: int) -> bytes:
|
||||
if stat.S_ISLNK(mode):
|
||||
return b"slnk"
|
||||
elif stat.S_ISDIR(mode):
|
||||
return b"tree"
|
||||
elif stat.S_ISREG(mode):
|
||||
return b"blob"
|
||||
return b""
|
||||
|
||||
|
||||
def default_bsv_config_path() -> Path:
|
||||
path = Path(os.environ.get("BSV_CONFIG", ""))
|
||||
if path and path.is_absolute() and path.is_file():
|
||||
return path
|
||||
for path in user_config_dirs():
|
||||
if path.is_file():
|
||||
return path
|
||||
return user_config_home() / "bsv/config"
|
||||
|
||||
def default_local_repository_path() -> Path:
|
||||
return user_data_home() / "bsv"
|
||||
|
||||
|
||||
def user_data_home() -> Path:
|
||||
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||
else: # Assume Unix
|
||||
path = Path(os.environ.get("XDG_DATA_HOME", ""))
|
||||
if path and path.is_absolute():
|
||||
return path
|
||||
return Path.home() / ".local/share"
|
||||
|
||||
def user_config_home() -> Path:
|
||||
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||
else: # Assume Unix
|
||||
path = Path(os.environ.get("XDG_CONFIG_HOME", ""))
|
||||
if path and path.is_absolute():
|
||||
return path
|
||||
return Path.home() / ".config"
|
||||
|
||||
def user_config_dirs() -> list[Path]:
|
||||
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||
else: # Assume Unix
|
||||
paths = list(filter(Path.is_absolute, map(Path, (os.environ.get("XDG_CONFIG_DIRS") or "/etc/xdg").split(":"))))
|
||||
return [user_config_home()] + paths
|
||||
|
||||
|
||||
class Hash(ABC):
|
||||
name: str
|
||||
digest_size: int
|
||||
|
||||
@abstractmethod
|
||||
def update(self, *data: bytes | bytearray | memoryview):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def digest(self) -> bytes:
|
||||
...
|
||||
@@ -1,5 +1,5 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
@@ -13,6 +13,11 @@
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""pybsv - A Backup, Synchronization and Versioning tool."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from bsv._version import __version__, __version_tuple__
|
||||
|
||||
|
||||
__all__ = ["__version__", "__version_tuple__"]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
@@ -13,9 +13,11 @@
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Main entry-point. Allow to use bsv module as a command."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from bsv.main import main
|
||||
from bsv.cli import cli
|
||||
|
||||
|
||||
exit(main())
|
||||
exit(cli())
|
||||
|
||||
315
src/bsv/cli.py
Normal file
315
src/bsv/cli.py
Normal file
@@ -0,0 +1,315 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Command-line interface. This is where all bsv commands are defined."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import math
|
||||
from pathlib import Path, PurePosixPath
|
||||
import platform
|
||||
import sys
|
||||
from typing import Any, ClassVar, Literal
|
||||
|
||||
import click
|
||||
|
||||
from bsv.cli_utils import format_human_byte_size
|
||||
from bsv.repo import default_repository_path
|
||||
from bsv.vfs import (
|
||||
AlreadyExistError,
|
||||
FileMetadata,
|
||||
NotFoundError,
|
||||
Permissions,
|
||||
VirtualFileSystem,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepositoryParams:
|
||||
"""Global parameters shared by all commands."""
|
||||
|
||||
path: Path
|
||||
|
||||
def as_filesystem(self) -> VirtualFileSystem:
|
||||
return VirtualFileSystem(self.path)
|
||||
|
||||
|
||||
class PermissionsType(click.ParamType):
|
||||
"""Converter for permissions given on the command line."""
|
||||
|
||||
name: ClassVar[str] = "permissions"
|
||||
|
||||
def convert(
|
||||
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||
) -> Permissions:
|
||||
"""Convert an argument to a `Permissions` object."""
|
||||
if isinstance(value, Permissions):
|
||||
return value
|
||||
|
||||
try:
|
||||
return Permissions(value)
|
||||
except ValueError as err:
|
||||
self.fail(str(err), param, ctx)
|
||||
|
||||
|
||||
class BsvPathType(click.ParamType):
|
||||
"""Converter for bsv paths given on the command line."""
|
||||
|
||||
name: ClassVar[str] = "bsv_path"
|
||||
|
||||
def convert(
|
||||
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||
) -> PurePosixPath:
|
||||
"""Convert an argument to a bsv path (absolute `PurePosixPath`)."""
|
||||
if isinstance(value, PurePosixPath):
|
||||
return value
|
||||
|
||||
try:
|
||||
path = PurePosixPath(value)
|
||||
except ValueError as err:
|
||||
self.fail(str(err), param, ctx)
|
||||
|
||||
if not path.is_absolute():
|
||||
self.fail(f"{value} is not an absolute path", param, ctx)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
class AnyPathType(click.ParamType):
|
||||
"""Converter for bsv or fs paths given on the command line."""
|
||||
|
||||
name: str = "any_path"
|
||||
|
||||
default: Literal["bsv", "fs"]
|
||||
|
||||
def __init__(self, default: Literal["bsv", "fs"] = "fs"):
|
||||
self.default = default
|
||||
|
||||
def convert(
|
||||
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||
) -> PurePosixPath | Path:
|
||||
"""Convert an argument to a bsv or fs path."""
|
||||
if isinstance(value, (PurePosixPath, Path)):
|
||||
return value
|
||||
|
||||
if not isinstance(value, str):
|
||||
self.fail(f"{value} is not a string")
|
||||
|
||||
path_type = self.default
|
||||
if value.startswith("bsv:"):
|
||||
path_type = "bsv"
|
||||
value = value.removeprefix("bsv:")
|
||||
elif value.startswith("fs:"):
|
||||
path_type = "fs"
|
||||
value = value.removeprefix("fs:")
|
||||
|
||||
if path_type == "bsv":
|
||||
return BsvPathType().convert(value, param, ctx)
|
||||
else:
|
||||
return Path(value)
|
||||
|
||||
|
||||
PERMISSIONS_TYPE = PermissionsType()
|
||||
ANY_OBJECT_TYPE = BsvPathType() # TODO: accept bsv path and object id.
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option()
|
||||
@click.option(
|
||||
"--repo", envvar="BSV_REPO", type=click.Path(resolve_path=True, path_type=Path)
|
||||
)
|
||||
@click.pass_context
|
||||
def cli(ctx: click.Context, repo: Path):
|
||||
"""Backup, Synchronization and Versioning (bsv) tool.
|
||||
|
||||
bsv manages synchronization of several "devices" with history. This makes it
|
||||
suitable for different tasks:
|
||||
|
||||
* Backup: Synchronize your data with remote devices that serve as backup. The
|
||||
remotes should be configured to keep previous versions of the files (using
|
||||
configurable rules) so even if a file is deleted/corrupted, a valid version can
|
||||
be found in the backup devices.
|
||||
* Synchronization: Synchronize your data among several devices you are working with.
|
||||
In case of conflict, the conflicting versions of a file are stored in each
|
||||
devices so it is possible to inspect and merge them to resolve the conflict.
|
||||
* Versioning: A local device can be used to store different versions of the same
|
||||
directory structure.
|
||||
"""
|
||||
ctx.obj = RepositoryParams(
|
||||
path=repo or default_repository_path(),
|
||||
)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.pass_obj
|
||||
def info(params: RepositoryParams):
|
||||
"""Print information on the current repository."""
|
||||
print(f"Repository: {params.path}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("-d", "--device-name", default=platform.node, prompt=True)
|
||||
@click.pass_obj
|
||||
def init(params: RepositoryParams, device_name: str):
|
||||
"""Initialize a bsv repository."""
|
||||
print(f"Repository path: {params.path!r}")
|
||||
print(f"Device name: {device_name!r}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("directories", nargs=-1, type=BsvPathType())
|
||||
@click.option("-m", "--mode", type=PERMISSIONS_TYPE, default=Permissions(0o770))
|
||||
@click.option("-p", "--parents", is_flag=True)
|
||||
@click.option("-v", "--verbose", is_flag=True)
|
||||
@click.pass_obj
|
||||
def mkdir(
|
||||
params: RepositoryParams,
|
||||
directories: list[PurePosixPath],
|
||||
mode: Permissions,
|
||||
parents: bool = False,
|
||||
verbose: bool = False,
|
||||
):
|
||||
"""Make a directory in the current repository."""
|
||||
fs = params.as_filesystem()
|
||||
|
||||
return_code = 0
|
||||
for dir in directories:
|
||||
try:
|
||||
fs.mkdir(dir, mode=mode, parents=parents)
|
||||
except AlreadyExistError as error:
|
||||
click.echo(error, file=sys.stderr)
|
||||
except NotFoundError as error:
|
||||
return_code = 1
|
||||
click.echo(error, file=sys.stderr)
|
||||
else:
|
||||
if verbose:
|
||||
click.echo(f"Created {dir}")
|
||||
|
||||
sys.exit(return_code)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("files", nargs=-1, type=BsvPathType())
|
||||
@click.option("--filter", flag_value="hidden", default=True, hidden=True)
|
||||
@click.option("-a", "--all", "filter", flag_value="all")
|
||||
@click.option("-A", "--almost-all", "filter", flag_value="implied")
|
||||
@click.option("-h", "--human-readable", is_flag=True)
|
||||
@click.option("-l", "--list", is_flag=True)
|
||||
@click.pass_obj
|
||||
def ls(
|
||||
params: RepositoryParams,
|
||||
files: tuple[PurePosixPath],
|
||||
filter: Literal["hidden", "implied", "all"],
|
||||
human_readable: bool,
|
||||
list: bool,
|
||||
):
|
||||
"""List information about files."""
|
||||
fs = params.as_filesystem()
|
||||
|
||||
if not files:
|
||||
files = (PurePosixPath("/"),)
|
||||
|
||||
filter_md = FileMetadata.is_hidden if filter == "hidden" else lambda _: False
|
||||
|
||||
for file_index, file in enumerate(files):
|
||||
if len(files) > 1:
|
||||
if file_index:
|
||||
click.echo()
|
||||
click.echo(f"{file}:")
|
||||
|
||||
items = [(md.path.name, md) for md in fs.iter_dir(file) if not filter_md(md)]
|
||||
items.sort()
|
||||
|
||||
if filter == "all":
|
||||
items[0:0] = [
|
||||
(".", fs.metadata(file)),
|
||||
("..", fs.metadata(file.parent)),
|
||||
]
|
||||
|
||||
if list:
|
||||
rows: list[tuple[str, str, str, str]] = []
|
||||
rows_width: list[int] = [0, 0, 0, 0]
|
||||
for name, md in items:
|
||||
mode = str(md.unix_mode)
|
||||
size = (
|
||||
format_human_byte_size(md.byte_size)
|
||||
if human_readable
|
||||
else str(md.byte_size)
|
||||
)
|
||||
local_time = md.modification_time.astimezone().replace(tzinfo=None)
|
||||
time = local_time.isoformat(" ", "seconds")
|
||||
row = (mode, size, time, name)
|
||||
rows.append(row)
|
||||
for index, field in enumerate(row):
|
||||
rows_width[index] = max(rows_width[index], len(field))
|
||||
|
||||
for mode, size, time, name in rows:
|
||||
click.echo(
|
||||
" ".join(
|
||||
[
|
||||
mode.ljust(rows_width[0]),
|
||||
size.rjust(rows_width[1]),
|
||||
time.ljust(rows_width[2]),
|
||||
name,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
for name, _ in items:
|
||||
click.echo(name)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("object", type=ANY_OBJECT_TYPE)
|
||||
@click.pass_obj
|
||||
def show(
|
||||
params: RepositoryParams,
|
||||
object: PurePosixPath,
|
||||
):
|
||||
"""Show a bsv object."""
|
||||
print(f"object: {object!r}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("srcs", nargs=-1, type=AnyPathType(default="bsv"))
|
||||
@click.argument("dst", type=AnyPathType(default="bsv"))
|
||||
@click.option("-r", "--recursive", is_flag=True)
|
||||
@click.pass_obj
|
||||
def cp(
|
||||
params: RepositoryParams,
|
||||
srcs: list[PurePosixPath | Path],
|
||||
dst: PurePosixPath | Path,
|
||||
recursive: bool,
|
||||
):
|
||||
"""Copy files or directories."""
|
||||
print(f"srcs: {srcs!r}")
|
||||
print(f"dst: {dst!r}")
|
||||
print(f"recursive: {recursive!r}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("targets", nargs=-1, type=BsvPathType())
|
||||
@click.option("-r", "--recursive", is_flag=True)
|
||||
@click.pass_obj
|
||||
def rm(
|
||||
params: RepositoryParams,
|
||||
targets: list[PurePosixPath],
|
||||
recursive: bool,
|
||||
):
|
||||
"""Remove files or directories."""
|
||||
print(f"targets: {targets}")
|
||||
print(f"recursive: {recursive}")
|
||||
48
src/bsv/cli_utils.py
Normal file
48
src/bsv/cli_utils.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Tools and utilities to build the command-line interface."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Final
|
||||
|
||||
|
||||
BINARY_PREFIXES: Final[list[str]] = [
|
||||
"",
|
||||
"Ki",
|
||||
"Mi",
|
||||
"Gi",
|
||||
"Ti",
|
||||
"Pi",
|
||||
"Ei",
|
||||
"Zi",
|
||||
"Yi",
|
||||
"Ri",
|
||||
"Qi",
|
||||
]
|
||||
|
||||
|
||||
def format_human_byte_size(byte_size: int) -> str:
|
||||
"""Format the given `byte_size` as a human-readable string."""
|
||||
index = min(max((byte_size.bit_length() - 1) // 10, 0), len(BINARY_PREFIXES) - 1)
|
||||
size = byte_size / 1024**index
|
||||
num_digits = len(str(int(size)))
|
||||
decimals = max(0, 3 - num_digits)
|
||||
rounded = round(size, decimals)
|
||||
if rounded == 1024 and index + 1 < len(BINARY_PREFIXES):
|
||||
rounded = 1
|
||||
index += 1
|
||||
return f"{rounded:.16g}{BINARY_PREFIXES[index]}B"
|
||||
@@ -1,111 +0,0 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from os import getlogin
|
||||
from pathlib import Path
|
||||
import platform
|
||||
|
||||
from bsv.command import command
|
||||
|
||||
|
||||
def init_parser(parser: ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--name", "-d",
|
||||
help = "Name of the repository. Default to system hostname.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--interactive", "-i",
|
||||
action = "store_true",
|
||||
help = "Prompt the user for configuration choices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"destination",
|
||||
type = Path,
|
||||
nargs = "?",
|
||||
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
||||
)
|
||||
|
||||
@command(init_parser)
|
||||
def init(
|
||||
repository_path: Path | None,
|
||||
destination: Path | None = None,
|
||||
name: str | None = None,
|
||||
interactive: bool = False,
|
||||
) -> int:
|
||||
"""Initialize a new bsv repository.
|
||||
"""
|
||||
from datetime import datetime as DateTime
|
||||
import tomlkit
|
||||
|
||||
if name is None:
|
||||
name = platform.node()
|
||||
|
||||
if destination is None:
|
||||
# TODO: Choose a sensible system-dependent path.
|
||||
destination = Path.cwd()
|
||||
|
||||
if interactive:
|
||||
name = input(f"Repository name: (default to {name})\n").strip() or name
|
||||
destination = Path(input(f"Destination: (default to {destination})\n").strip()) or destination
|
||||
if not destination.is_absolute():
|
||||
destination = Path.cwd() / destination
|
||||
|
||||
if not name:
|
||||
raise RuntimeError("repository name cannot be empty")
|
||||
if not destination.parent.exists():
|
||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
||||
if destination.exists() and not destination.is_dir():
|
||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
||||
if destination.exists() and len(list(destination.iterdir())):
|
||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
||||
|
||||
try:
|
||||
destination.mkdir(exist_ok=True)
|
||||
except:
|
||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
||||
|
||||
bsv_table = tomlkit.table()
|
||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
||||
bsv_table.add("name", name)
|
||||
bsv_table.add(tomlkit.nl())
|
||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
||||
bsv_table.add("path_map", tomlkit.array())
|
||||
|
||||
cas_table = tomlkit.table()
|
||||
cas_table.add("type", "simple")
|
||||
cas_table.add("hash", "sha256")
|
||||
|
||||
doc = tomlkit.document()
|
||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||
doc.add(tomlkit.nl())
|
||||
doc.add("bsv", bsv_table)
|
||||
doc.add(tomlkit.nl())
|
||||
doc.add("cas", cas_table)
|
||||
|
||||
config_path = destination / "bsv_config.toml"
|
||||
try:
|
||||
stream = config_path.open("w", encoding="utf-8")
|
||||
except:
|
||||
raise RuntimeError("failed to open configuration file {config_path}")
|
||||
|
||||
with stream:
|
||||
tomlkit.dump(doc, stream)
|
||||
|
||||
return 0
|
||||
0
src/bsv/py.typed
Normal file
0
src/bsv/py.typed
Normal file
41
src/bsv/repo.py
Normal file
41
src/bsv/repo.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import platform
|
||||
|
||||
|
||||
def default_repository_path() -> Path:
|
||||
"""Return the system-dependent default repository path."""
|
||||
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||
msg = f"default_repository_path does not support {platform.system()} system"
|
||||
raise NotImplementedError(msg)
|
||||
else: # Assume Unix
|
||||
# See https://specifications.freedesktop.org/basedir-spec/latest/
|
||||
data_home = os.environ.get("XDG_DATA_HOME", "")
|
||||
if data_home:
|
||||
path = Path(data_home)
|
||||
if not path.is_absolute() or not path.exists():
|
||||
msg = (
|
||||
f"invalid XDG_DATA_HOME ({path}): path is relative or does not "
|
||||
"exists"
|
||||
)
|
||||
raise RuntimeError(msg)
|
||||
else:
|
||||
path = Path.home() / ".local/share"
|
||||
return path / "bsv/repo"
|
||||
@@ -1,437 +0,0 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||
import hashlib
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePosixPath
|
||||
import platform
|
||||
import tomllib
|
||||
from typing import Any, BinaryIO, Callable, Type
|
||||
|
||||
from fastcdc import fastcdc
|
||||
import tomlkit
|
||||
|
||||
from bsv import __version__
|
||||
from bsv.simple_cas import SimpleCas
|
||||
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
|
||||
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
|
||||
|
||||
|
||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||
DEFAULT_AVG_CHUNK_SIZE = 1 << 16
|
||||
DEFAULT_MAX_CHUNK_SIZE = 1 << 20
|
||||
|
||||
|
||||
class Repository:
|
||||
_path: Path
|
||||
_name: str
|
||||
|
||||
_cas: SimpleCas
|
||||
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
||||
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
||||
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
||||
|
||||
_path_map: list[PathPair]
|
||||
# _remotes: list[object]
|
||||
|
||||
_context_depth: int = 0
|
||||
|
||||
def __init__(self, path: Path):
|
||||
self._path = path
|
||||
|
||||
with self.config_file.open("rb") as stream:
|
||||
config = tomllib.load(stream)
|
||||
|
||||
bsv = config.get("bsv", {})
|
||||
|
||||
self._name = bsv.get("name") or platform.node()
|
||||
|
||||
self._cas = make_cas(
|
||||
bsv.get("cas"),
|
||||
self._path,
|
||||
lambda: hashlib.new(bsv.get("hash")), # type: ignore
|
||||
)
|
||||
self._min_chunk_size = bsv.get("min_chunk_size")
|
||||
self._avg_chunk_size = bsv.get("avg_chunk_size")
|
||||
self._max_chunk_size = bsv.get("max_chunk_size")
|
||||
|
||||
self._path_map = [
|
||||
PathPair.from_obj(pair)
|
||||
for pair in bsv.get("path_map", [])
|
||||
]
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self._path
|
||||
|
||||
@property
|
||||
def config_file(self) -> Path:
|
||||
return self.path / "bsv_config.toml"
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def path_map(self) -> list[PathPair]:
|
||||
return list(self._path_map)
|
||||
|
||||
def get_blob(self, digest: Digest) -> Blob:
|
||||
with self:
|
||||
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
|
||||
|
||||
def add_blob(self, stream: BinaryIO) -> Digest:
|
||||
with self:
|
||||
return self._write(b"blob", stream)
|
||||
|
||||
def get_tree(self, digest: Digest) -> Tree:
|
||||
with self:
|
||||
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
|
||||
|
||||
def add_tree(self, tree: Tree) -> Digest:
|
||||
with self:
|
||||
return self._cas.write(b"tree", tree.to_bytes())
|
||||
|
||||
def get_snapshot(self, digest: Digest) -> Snapshot:
|
||||
with self:
|
||||
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
|
||||
|
||||
def add_snapshot(self, snapshot: Snapshot) -> Digest:
|
||||
with self:
|
||||
return self._cas.write(b"snap", snapshot.to_bytes())
|
||||
|
||||
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
|
||||
obj = self._cas.read(digest, object_type=object_type)
|
||||
stream = BytesIO(obj.data)
|
||||
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
|
||||
|
||||
def _write(self, object_type: bytes, stream: BinaryIO) -> Digest:
|
||||
out = BytesIO()
|
||||
size = 0
|
||||
for chunk in fastcdc(
|
||||
stream,
|
||||
min_size = self._min_chunk_size,
|
||||
avg_size = self._avg_chunk_size,
|
||||
max_size = self._max_chunk_size,
|
||||
fat = True,
|
||||
):
|
||||
size += chunk.length
|
||||
digest = self._cas.write(b"chnk", chunk.data)
|
||||
out.write(digest.digest)
|
||||
out.write(chunk.length.to_bytes(4))
|
||||
return self._cas.write(object_type, size.to_bytes(8) + out.getvalue())
|
||||
|
||||
def __enter__(self):
|
||||
if self._context_depth == 0:
|
||||
self._cas.__enter__()
|
||||
self._context_depth += 1
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self._context_depth -= 1
|
||||
if self._context_depth == 0:
|
||||
return self._cas.__exit__(exc_type, exc_value, traceback)
|
||||
|
||||
|
||||
def create_repository(
|
||||
destination: Path,
|
||||
name: str,
|
||||
cas: str = "simple",
|
||||
hash: str = "sha256",
|
||||
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
||||
avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE,
|
||||
max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
|
||||
):
|
||||
from datetime import datetime as DateTime
|
||||
from os import getlogin
|
||||
|
||||
if not name:
|
||||
raise RuntimeError("repository name cannot be empty")
|
||||
if not destination.parent.exists():
|
||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
||||
if destination.exists() and not destination.is_dir():
|
||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
||||
if destination.exists() and len(list(destination.iterdir())):
|
||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
||||
|
||||
try:
|
||||
destination.mkdir(exist_ok=True)
|
||||
except:
|
||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
||||
|
||||
bsv_table = tomlkit.table()
|
||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
||||
bsv_table.add("name", name)
|
||||
bsv_table.add(tomlkit.nl())
|
||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
||||
bsv_table.add("path_map", tomlkit.array())
|
||||
bsv_table.add("cas", cas)
|
||||
bsv_table.add("hash", hash)
|
||||
bsv_table.add("min_chunk_size", min_chunk_size)
|
||||
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
||||
bsv_table.add("max_chunk_size", max_chunk_size)
|
||||
|
||||
doc = tomlkit.document()
|
||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||
doc.add(tomlkit.nl())
|
||||
doc.add("bsv", bsv_table)
|
||||
|
||||
config_path = destination / "bsv_config.toml"
|
||||
try:
|
||||
stream = config_path.open("w", encoding="utf-8")
|
||||
except:
|
||||
raise RuntimeError("failed to open configuration file {config_path}")
|
||||
|
||||
with stream:
|
||||
tomlkit.dump(doc, stream)
|
||||
|
||||
return Repository(destination)
|
||||
|
||||
|
||||
def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> SimpleCas:
|
||||
if cas_name == "simple":
|
||||
return SimpleCas(path, hash_factory)
|
||||
raise ConfigError(f"unknown cas name {cas_name}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkedObject:
|
||||
repo: Repository
|
||||
size: int
|
||||
chunks: list[Chunk]
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> ChunkedObject:
|
||||
self = cls(
|
||||
repo = repo,
|
||||
size = int.from_bytes(read_exact(stream, 8)),
|
||||
chunks = [],
|
||||
)
|
||||
while (chunk := Chunk.from_stream(stream, digest_size)) is not None:
|
||||
self.chunks.append(chunk)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class Blob(ChunkedObject):
|
||||
_chunk_index: int = 0
|
||||
_chunk_data: bytes = b""
|
||||
|
||||
def read(self, num_bytes: int = -1) -> bytes:
|
||||
parts = [self._chunk_data]
|
||||
size = len(parts[-1])
|
||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
|
||||
parts.append(self.read1())
|
||||
size += len(parts[-1])
|
||||
if num_bytes >= 0:
|
||||
self._chunk_data = parts[-1][num_bytes - size:]
|
||||
else:
|
||||
self._chunk_data = b""
|
||||
return b"".join(parts)
|
||||
|
||||
def read1(self) -> bytes:
|
||||
if self._chunk_index == len(self.chunks):
|
||||
return b""
|
||||
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||
self._chunk_index += 1
|
||||
return object.data
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tree:
|
||||
repo: Repository
|
||||
items: list[TreeItem]
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Tree:
|
||||
tree = Tree(repo, [])
|
||||
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
||||
tree.items.append(item)
|
||||
return tree
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, repo: Repository, data: bytes) -> Tree:
|
||||
stream = BytesIO(data)
|
||||
return cls.from_stream(repo, stream)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
self.items.sort(key=lambda i: i.name)
|
||||
for item in self.items:
|
||||
item.write(stream)
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
stream = BytesIO()
|
||||
self.write(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||
|
||||
@dataclass
|
||||
class TreeItem:
|
||||
name: str
|
||||
digest: Digest
|
||||
permissions: int
|
||||
creation_timestamp: int
|
||||
modification_timestamp: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
digest: Digest,
|
||||
permissions: int,
|
||||
creation_timestamp: int,
|
||||
modification_timestamp: int,
|
||||
):
|
||||
if "/\\" in name:
|
||||
raise ValueError(f"invalid tree item name {name}")
|
||||
self.name = name
|
||||
self.digest = digest
|
||||
self.permissions = permissions
|
||||
self.creation_timestamp = creation_timestamp
|
||||
self.modification_timestamp = modification_timestamp
|
||||
|
||||
@property
|
||||
def creation_time(self) -> DateTime:
|
||||
return time_from_timestamp(self.creation_timestamp)
|
||||
@creation_time.setter
|
||||
def creation_time(self, time: DateTime):
|
||||
self.creation_timestamp = timestamp_from_time(time)
|
||||
|
||||
@property
|
||||
def modification_time(self) -> DateTime:
|
||||
return time_from_timestamp(self.modification_timestamp)
|
||||
@modification_time.setter
|
||||
def modification_time(self, time: DateTime):
|
||||
self.modification_timestamp = timestamp_from_time(time)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
return TreeItem(
|
||||
digest = Digest(digest_bytes),
|
||||
permissions = int.from_bytes(read_exact(stream, 2)),
|
||||
creation_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||
modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||
)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
stream.write(self.digest.digest)
|
||||
stream.write(self.permissions.to_bytes(2))
|
||||
stream.write(self.creation_timestamp.to_bytes(8, signed=True))
|
||||
stream.write(self.modification_timestamp.to_bytes(8, signed=True))
|
||||
name_bytes = self.name.encode("utf-8")
|
||||
stream.write(len(name_bytes).to_bytes(2))
|
||||
stream.write(name_bytes)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Snapshot:
|
||||
repo: Repository
|
||||
tree_digest: Digest
|
||||
repo_name: str
|
||||
timestamp: int
|
||||
|
||||
@property
|
||||
def time(self) -> DateTime:
|
||||
return time_from_timestamp(self.timestamp)
|
||||
@time.setter
|
||||
def time(self, time: DateTime):
|
||||
self.timestamp = timestamp_from_time(time)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
|
||||
return Snapshot(
|
||||
repo = repo,
|
||||
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
||||
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||
timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_bytes(cls, repo: Repository, data: bytes) -> Snapshot:
|
||||
stream = BytesIO(data)
|
||||
return cls.from_stream(repo, stream)
|
||||
|
||||
def write(self, stream: BinaryIO):
|
||||
stream.write(self.tree_digest.digest)
|
||||
repo_name_bytes = self.repo_name.encode("utf-8")
|
||||
stream.write(len(repo_name_bytes).to_bytes(2))
|
||||
stream.write(repo_name_bytes)
|
||||
stream.write(self.timestamp.to_bytes(8, signed=True))
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
stream = BytesIO()
|
||||
self.write(stream)
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
digest: Digest
|
||||
size: int
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||
if digest_bytes is None:
|
||||
return None
|
||||
digest = Digest(digest_bytes)
|
||||
|
||||
return cls(
|
||||
digest = digest,
|
||||
size = int.from_bytes(read_exact(stream, 4)),
|
||||
)
|
||||
|
||||
|
||||
class PathPair:
|
||||
bsv: PurePosixPath
|
||||
fs: Path
|
||||
|
||||
def __init__(self, bsv: PurePosixPath, fs: Path):
|
||||
self.bsv = bsv
|
||||
self.fs = fs
|
||||
|
||||
@classmethod
|
||||
def from_obj(cls, obj: dict[str, Any]) -> PathPair:
|
||||
bsv = PurePosixPath(obj["bsv"])
|
||||
fs = Path(obj["fs"])
|
||||
|
||||
if not bsv.is_absolute() or not fs.is_absolute():
|
||||
raise ValueError("paths in path_map must be absolute")
|
||||
|
||||
return cls(
|
||||
bsv = obj["bsv"],
|
||||
fs = obj["fs"],
|
||||
)
|
||||
|
||||
def __lt__(self, rhs: PathPair) -> bool:
|
||||
return self.bsv < rhs.bsv
|
||||
|
||||
|
||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||
|
||||
def timestamp_from_time(time: DateTime) -> int:
|
||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||
348
src/bsv/vfs.py
Normal file
348
src/bsv/vfs.py
Normal file
@@ -0,0 +1,348 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Provide a virtual file system interface alongside associated tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from functools import total_ordering
|
||||
import os
|
||||
from pathlib import Path, PurePosixPath
|
||||
from stat import S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, S_IMODE, filemode
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, Self
|
||||
|
||||
from typing_extensions import Buffer
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator
|
||||
from os import stat_result
|
||||
|
||||
|
||||
AnyBsvPath = PurePosixPath | str
|
||||
|
||||
|
||||
class FsError(RuntimeError):
|
||||
"""Error type raised by `FileSystem` objects."""
|
||||
|
||||
|
||||
class AlreadyExistError(FsError):
|
||||
"""Raise when trying to create an item that already exists."""
|
||||
|
||||
|
||||
class NotFoundError(FsError):
|
||||
"""Raise when trying to access an item that do not exist."""
|
||||
|
||||
|
||||
class Permissions:
|
||||
"""Represent the permissions of an object in a filesystem."""
|
||||
|
||||
unix_perms: int
|
||||
|
||||
def __init__(self, unix_perms: int | str = 0o640):
|
||||
"""Create a `Permissions` object from `unix_perms`."""
|
||||
if isinstance(unix_perms, str):
|
||||
unix_perms = int(unix_perms, 8)
|
||||
self.unix_perms = unix_perms
|
||||
|
||||
def __eq__(self, rhs: Any) -> bool:
|
||||
"""Test if two `Permission` are the same."""
|
||||
return (
|
||||
rhs.unix_perms == self.unix_perms
|
||||
if isinstance(rhs, Permissions)
|
||||
else NotImplemented
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Return a representation of the permissions as valid python code."""
|
||||
return f"Permissions(0o{oct(self.unix_perms)[2:].rjust(4, '0')})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return a representation of the permissions of the form 'rwxrwxrwx'."""
|
||||
return filemode(self.unix_perms)[1:]
|
||||
|
||||
|
||||
DEFAULT_DIR_PERMS = Permissions(0o770)
|
||||
DEFAULT_FILE_PERMS = Permissions(0o640)
|
||||
|
||||
|
||||
FileType = Literal["dir", "file", "symlink", "other"]
|
||||
|
||||
_IFMT_MAP: dict[int, FileType] = {
|
||||
S_IFDIR: "dir",
|
||||
S_IFREG: "file",
|
||||
S_IFLNK: "symlink",
|
||||
}
|
||||
|
||||
|
||||
@total_ordering
|
||||
class FileMetadata:
|
||||
"""Metadata associated with vfs files: file type, permissions, etc."""
|
||||
|
||||
path: PurePosixPath
|
||||
type: FileType
|
||||
permissions: Permissions
|
||||
modification_time: datetime
|
||||
byte_size: int
|
||||
_stat: stat_result
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: PurePosixPath,
|
||||
*,
|
||||
type: FileType,
|
||||
permissions: Permissions,
|
||||
modification_time: datetime,
|
||||
byte_size: int,
|
||||
):
|
||||
"""Create a `FileMetadata`."""
|
||||
self.path = path
|
||||
self.type = type
|
||||
self.permissions = permissions
|
||||
self.modification_time = modification_time
|
||||
self.byte_size = byte_size
|
||||
|
||||
@classmethod
|
||||
def from_stat(
|
||||
cls,
|
||||
path: PurePosixPath,
|
||||
stat: stat_result,
|
||||
) -> Self:
|
||||
"""Create a `FileMetadata` from a `stat_result`."""
|
||||
return cls(
|
||||
path,
|
||||
type=_IFMT_MAP.get(S_IFMT(stat.st_mode), "other"),
|
||||
permissions=Permissions(S_IMODE(stat.st_mode)),
|
||||
modification_time=datetime.fromtimestamp(stat.st_mtime, UTC),
|
||||
byte_size=stat.st_size,
|
||||
)
|
||||
|
||||
@property
|
||||
def unix_mode(self) -> str:
|
||||
"""Return unix-like mode in the form '-rwxrwxrwx'."""
|
||||
return UNIX_MODE_FILE_TYPE[self.type] + str(self.permissions)
|
||||
|
||||
@property
|
||||
def is_hidden(self) -> bool:
|
||||
"""Return true if the file starts with a '.'."""
|
||||
return self.path.name.startswith(".")
|
||||
|
||||
@property
|
||||
def is_file(self) -> bool:
|
||||
"""Test if this is a file."""
|
||||
return self.type == "file"
|
||||
|
||||
@property
|
||||
def is_dir(self) -> bool:
|
||||
"""Test if this is a directory."""
|
||||
return self.type == "dir"
|
||||
|
||||
@property
|
||||
def is_symlink(self) -> bool:
|
||||
"""Test if this is a symbolic link."""
|
||||
return self.type == "symlink"
|
||||
|
||||
@property
|
||||
def is_other(self) -> bool:
|
||||
"""Test if this is a symbolic link."""
|
||||
return self.type == "other"
|
||||
|
||||
def _as_tuple(
|
||||
self,
|
||||
) -> tuple[PurePosixPath, FileType, Permissions, datetime, int]:
|
||||
return (
|
||||
self.path,
|
||||
self.type,
|
||||
self.permissions,
|
||||
self.modification_time,
|
||||
self.byte_size,
|
||||
)
|
||||
|
||||
def __eq__(self, rhs: Any) -> bool:
|
||||
"""Test if two `Metadata` are the same."""
|
||||
return (
|
||||
self._as_tuple() == rhs._as_tuple()
|
||||
if isinstance(rhs, FileMetadata)
|
||||
else NotImplemented
|
||||
)
|
||||
|
||||
def __lt__(self, rhs: Any) -> bool:
|
||||
"""Compare `rhs.path` with `self.path`."""
|
||||
return self.path < rhs.path if isinstance(rhs, FileMetadata) else NotImplemented
|
||||
|
||||
|
||||
UNIX_MODE_FILE_TYPE = {
|
||||
"dir": "d",
|
||||
"file": "-",
|
||||
"other": "o",
|
||||
"link": "l",
|
||||
}
|
||||
|
||||
|
||||
class VirtualFileSystem:
|
||||
"""Represent a file system, with common file system operations."""
|
||||
|
||||
path: Path
|
||||
|
||||
def __init__(self, path: Path):
|
||||
"""Initialize the file system to point to `path`."""
|
||||
self.path = path
|
||||
|
||||
def exists(self, path: AnyBsvPath) -> bool:
|
||||
"""Test if the `path` point to an existing item."""
|
||||
path = self._make_path(path)
|
||||
return self._real_path(path).exists()
|
||||
|
||||
def is_file(self, path: AnyBsvPath) -> bool:
|
||||
"""Test if `path` is a file."""
|
||||
return self.metadata(path).is_file
|
||||
|
||||
def is_dir(self, path: AnyBsvPath) -> bool:
|
||||
"""Test if `path` is a directory."""
|
||||
return self.metadata(path).is_dir
|
||||
|
||||
def is_symlink(self, path: AnyBsvPath) -> bool:
|
||||
"""Test if `path` is a symbolic link."""
|
||||
return self.metadata(path).is_symlink
|
||||
|
||||
def is_other(self, path: AnyBsvPath) -> bool:
|
||||
"""Test if `path` is not a file, directory or symbolic link."""
|
||||
return self.metadata(path).is_other
|
||||
|
||||
def metadata(self, path: AnyBsvPath) -> FileMetadata:
|
||||
"""Return the metadata of a given object."""
|
||||
metadata = self.metadata_or_none(path)
|
||||
if metadata is None:
|
||||
msg = f"file '{path}' not found"
|
||||
raise NotFoundError(msg)
|
||||
return metadata
|
||||
|
||||
def metadata_or_none(self, path: AnyBsvPath) -> FileMetadata | None:
|
||||
"""Return the metadata of a given object or `None` if it does not exists."""
|
||||
path = self._make_path(path)
|
||||
try:
|
||||
stat = self._real_path(path).stat(follow_symlinks=False)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except OSError as err:
|
||||
msg = f"failed to read '{path}' metadata"
|
||||
raise FsError(msg) from err
|
||||
return FileMetadata.from_stat(path, stat)
|
||||
|
||||
def iter_dir(self, path: AnyBsvPath) -> Iterator[FileMetadata]:
|
||||
"""Return the metadata of all items in the directory `path`."""
|
||||
path = self._make_path(path)
|
||||
real_path = self._real_path(path)
|
||||
try:
|
||||
for entry in os.scandir(real_path):
|
||||
yield FileMetadata.from_stat(
|
||||
path / entry.name, entry.stat(follow_symlinks=False)
|
||||
)
|
||||
except OSError as err:
|
||||
msg = f"failed to read directory {path}"
|
||||
raise FsError(msg) from err
|
||||
|
||||
def read_bytes(self, path: AnyBsvPath) -> bytes:
|
||||
"""Return the content of `path` as `bytes`."""
|
||||
with self.open_read(path) as stream:
|
||||
return stream.read()
|
||||
|
||||
def write_bytes(self, path: AnyBsvPath, data: Buffer | BinaryIO) -> int:
|
||||
"""Create or replace a file at `path`, setting its content to `data`."""
|
||||
written = 0
|
||||
with self.open_write(path) as sout:
|
||||
if isinstance(data, Buffer):
|
||||
written += sout.write(data)
|
||||
else:
|
||||
while chunk := data.read(65536):
|
||||
written += sout.write(chunk)
|
||||
return written
|
||||
|
||||
def open_read(self, path: AnyBsvPath) -> BinaryIO:
|
||||
"""Return a read-only binary stream that read the content of `path`."""
|
||||
path = self._make_path(path)
|
||||
try:
|
||||
return self._real_path(path).open("rb")
|
||||
except OSError as err:
|
||||
msg = f"failed to read {path}"
|
||||
raise FsError(msg) from err
|
||||
|
||||
def open_write(self, path: AnyBsvPath) -> BinaryIO:
|
||||
"""Return a write-only binary stream write to `path`."""
|
||||
path = self._make_path(path)
|
||||
try:
|
||||
return self._real_path(path).open("wb")
|
||||
except OSError as err:
|
||||
msg = f"failed to read {path}"
|
||||
raise FsError(msg) from err
|
||||
|
||||
def mkdir(
|
||||
self,
|
||||
path: AnyBsvPath,
|
||||
mode: Permissions = DEFAULT_DIR_PERMS,
|
||||
parents: bool = False,
|
||||
exist_ok: bool = False,
|
||||
):
|
||||
"""Create a directory at `path`.
|
||||
|
||||
Args:
|
||||
path: The directory to create.
|
||||
mode: The permissions of the new directory.
|
||||
parents: If `True`, create parent directories if they don't exists.
|
||||
exist_ok: If `False` and `path` already exist, raise an error.
|
||||
|
||||
Raises:
|
||||
FsError: If something goes wrong.
|
||||
"""
|
||||
path = self._make_path(path)
|
||||
try:
|
||||
self._real_path(path).mkdir(
|
||||
mode=mode.unix_perms, parents=parents, exist_ok=exist_ok
|
||||
)
|
||||
except FileExistsError as err:
|
||||
msg = f"{path} already exists"
|
||||
raise AlreadyExistError(msg) from err
|
||||
except FileNotFoundError as err:
|
||||
msg = f"{path.parent} does not exist"
|
||||
raise NotFoundError(msg) from err
|
||||
|
||||
def make_link(self, path: AnyBsvPath, target: AnyBsvPath) -> None:
|
||||
"""Creates a symbolic link from `path` to `target`."""
|
||||
path = self._make_path(path)
|
||||
target = self._make_path(path)
|
||||
self._real_path(path).symlink_to(self._real_path(target))
|
||||
|
||||
def set_permissions(self, path: AnyBsvPath, permissions: Permissions) -> None:
|
||||
"""Set the permissions of `path` to `permissions`."""
|
||||
path = self._make_path(path)
|
||||
self._real_path(path).chmod(permissions.unix_perms)
|
||||
|
||||
def set_modification_time(self, path: AnyBsvPath, mod_time: datetime) -> None:
|
||||
"""Set the modification time of `path` to `mod_time`."""
|
||||
path = self._make_path(path)
|
||||
ts = mod_time.timestamp()
|
||||
os.utime(self._real_path(path), (ts, ts))
|
||||
|
||||
def _make_path(self, path: AnyBsvPath) -> PurePosixPath:
|
||||
if not isinstance(path, PurePosixPath):
|
||||
path = PurePosixPath(path)
|
||||
if not path.is_absolute():
|
||||
msg = f"{path} is not absolute"
|
||||
raise FsError(msg)
|
||||
return path
|
||||
|
||||
def _real_path(self, path: PurePosixPath) -> Path:
|
||||
return self.path / path.relative_to("/")
|
||||
286
tests.bak/test_repository.py
Normal file
286
tests.bak/test_repository.py
Normal file
@@ -0,0 +1,286 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
from datetime import UTC, datetime
|
||||
from os import stat_result
|
||||
from pathlib import Path
|
||||
from random import randbytes
|
||||
from shutil import rmtree
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
|
||||
from bsv.simple_cas.cas import Digest
|
||||
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_dir():
|
||||
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
|
||||
yield Path(tmp_dir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def repo(tmp_dir):
|
||||
return create_repository(
|
||||
tmp_dir / "bsv.config",
|
||||
"test_repo",
|
||||
tmp_dir / "bsv_repo",
|
||||
)
|
||||
|
||||
|
||||
def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
||||
path = tmp_dir / "test.dat"
|
||||
make_random_file(path, 1 << 20)
|
||||
|
||||
with path.open("rb") as stream:
|
||||
obj0 = repo.add_blob(stream)
|
||||
assert obj0.object_type == b"blob"
|
||||
with path.open("rb") as stream:
|
||||
assert obj0.blob.reader().read() == stream.read()
|
||||
|
||||
obj1 = repo.get_blob(obj0.digest)
|
||||
assert obj1.digest == obj0.digest
|
||||
assert obj1.object_type == obj0.object_type
|
||||
assert obj1.size == obj0.size
|
||||
with path.open("rb") as stream:
|
||||
assert obj1.blob.reader().read() == stream.read()
|
||||
|
||||
|
||||
def test_read_write_tree(repo: Repository):
|
||||
now = datetime.now(UTC)
|
||||
tree = Tree(
|
||||
repo,
|
||||
[
|
||||
TreeItem(
|
||||
digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||
object_type = b"blob",
|
||||
size = 123,
|
||||
permissions = 0o744,
|
||||
modification_timestamp_us = timestamp_us_from_time(now),
|
||||
name = "xyz",
|
||||
),
|
||||
TreeItem(
|
||||
digest = Digest(bytes([123]) * repo._cas._digest_size),
|
||||
object_type = b"slnk",
|
||||
size = 42,
|
||||
permissions = 0o777,
|
||||
modification_timestamp_us = timestamp_us_from_time(now),
|
||||
name = "foobar",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
||||
|
||||
obj0 = repo.add_tree(tree)
|
||||
assert obj0.object_type == b"tree"
|
||||
assert obj0.tree == tree
|
||||
|
||||
obj1 = repo.get_tree(obj0.digest)
|
||||
assert obj1.digest == obj0.digest
|
||||
assert obj1.object_type == obj0.object_type
|
||||
assert obj1.size == obj0.size
|
||||
assert obj1.tree == obj0.tree
|
||||
|
||||
|
||||
def test_read_write_snapshot(repo: Repository):
|
||||
snapshot = Snapshot(
|
||||
repo = repo,
|
||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||
parents = [
|
||||
Digest(bytes([123]) * repo._cas._digest_size),
|
||||
Digest(bytes([124]) * repo._cas._digest_size),
|
||||
],
|
||||
repo_name = "test_repo",
|
||||
timestamp_us = timestamp_us_from_time(datetime.now()),
|
||||
)
|
||||
|
||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||
|
||||
obj0 = repo.add_snapshot(snapshot)
|
||||
assert obj0.object_type == b"snap"
|
||||
assert obj0.snapshot == snapshot
|
||||
|
||||
obj1 = repo.get_snapshot(obj0.digest)
|
||||
assert obj1.digest == obj0.digest
|
||||
assert obj1.object_type == obj0.object_type
|
||||
assert obj1.size == obj0.size
|
||||
assert obj1.snapshot == obj0.snapshot
|
||||
|
||||
|
||||
class TestTreeWalker(TreeWalker):
|
||||
reports: list
|
||||
|
||||
def __init__(self, repo: Repository, dry_run: bool=False):
|
||||
super().__init__(repo, dry_run=dry_run)
|
||||
self.reports = []
|
||||
|
||||
def report(
|
||||
self,
|
||||
action: Action,
|
||||
path: Path,
|
||||
pstat: stat_result | None,
|
||||
info: IgnoreCause | Exception | None = None
|
||||
):
|
||||
super().report(action, path, pstat, info)
|
||||
self.reports.append((action, path, info if action != Action.REMOVE else None))
|
||||
|
||||
|
||||
def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||
dir = tmp_dir / "test0"
|
||||
structure0 = {
|
||||
"folder": {
|
||||
"sub_folder": {
|
||||
"empty_folder": {},
|
||||
"foo.txt": b"Hello World!\n",
|
||||
},
|
||||
"test.py": b"print(\"Hello World!\")\n",
|
||||
"bar.dat": bytes(range(256)),
|
||||
},
|
||||
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||
"bsv_repo": {
|
||||
"bsv_repository.config": b"[bsv]\n",
|
||||
},
|
||||
}
|
||||
structure1 = {
|
||||
"folder": {
|
||||
"sub_folder": {
|
||||
"empty_folder": {},
|
||||
"foo.txt": b"Hello World!\n",
|
||||
},
|
||||
"bar.dat": bytes(range(256)) * 2,
|
||||
},
|
||||
"new_file": b"whatever",
|
||||
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||
"bsv_repo": {
|
||||
"bsv_repository.config": b"[bsv]\n",
|
||||
},
|
||||
}
|
||||
|
||||
expected0 = dict(structure0)
|
||||
del expected0["bsv_repo"]
|
||||
|
||||
expected1 = dict(structure1)
|
||||
del expected1["bsv_repo"]
|
||||
|
||||
create_file_structure(dir, structure0)
|
||||
|
||||
def check(digest: Digest, value: dict | bytes):
|
||||
if isinstance(value, dict):
|
||||
tree = repo.get_tree(digest).tree
|
||||
assert tree
|
||||
assert list(map(lambda i: i.name, tree.items)) == sorted(value.keys())
|
||||
for item in tree.items:
|
||||
check(item.digest, value[item.name])
|
||||
elif isinstance(value, bytes):
|
||||
blob_obj = repo.get_blob(digest)
|
||||
data = blob_obj.blob.reader().read()
|
||||
assert data == value
|
||||
|
||||
walker = TestTreeWalker(repo)
|
||||
obj0 = walker.add_tree(dir)
|
||||
assert obj0.object_type == b"tree"
|
||||
assert walker.reports == [
|
||||
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
|
||||
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||
(Action.ADD, dir / "folder/bar.dat", None),
|
||||
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
|
||||
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
|
||||
(Action.ADD, dir / "folder/sub_folder", None),
|
||||
(Action.ADD, dir / "folder/test.py", None),
|
||||
(Action.ADD, dir / "folder", None),
|
||||
(Action.ADD, dir, None),
|
||||
]
|
||||
check(obj0.digest, expected0)
|
||||
|
||||
create_file_structure(dir, structure1)
|
||||
|
||||
walker.reports.clear()
|
||||
obj1 = walker.add_tree(dir, source_digest=obj0.digest)
|
||||
assert obj0.object_type == b"tree"
|
||||
assert walker.reports == [
|
||||
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||
(Action.UPDATE, dir / "folder/bar.dat", None),
|
||||
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
|
||||
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
|
||||
(Action.REMOVE, dir / "folder/test.py", None),
|
||||
(Action.UPDATE, dir / "folder", None),
|
||||
(Action.ADD, dir / "new_file", None),
|
||||
(Action.UPDATE, dir, None),
|
||||
]
|
||||
check(obj1.digest, expected1)
|
||||
|
||||
|
||||
def create_file_structure(dst: Path, value: dict | bytes):
|
||||
if isinstance(value, bytes):
|
||||
if dst.is_dir():
|
||||
rmtree(str(dst))
|
||||
if not dst.is_file() or dst.read_bytes() != value:
|
||||
dst.write_bytes(value)
|
||||
elif isinstance(value, dict):
|
||||
if dst.is_file():
|
||||
dst.unlink()
|
||||
if not dst.is_dir():
|
||||
dst.mkdir()
|
||||
|
||||
items = sorted(value.items())
|
||||
fs_paths = sorted(dst.iterdir())
|
||||
|
||||
item_index = 0
|
||||
fs_path_index = 0
|
||||
|
||||
while item_index < len(value) or fs_path_index < len(fs_paths):
|
||||
name, subitem = items[item_index] if item_index < len(items) else (None, None)
|
||||
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
|
||||
|
||||
if name and fs_path:
|
||||
if name < fs_path.name:
|
||||
fs_path = None
|
||||
elif name > fs_path.name:
|
||||
name = None
|
||||
|
||||
if name:
|
||||
item_index += 1
|
||||
if fs_path:
|
||||
fs_path_index += 1
|
||||
|
||||
if name:
|
||||
create_file_structure(dst / name, subitem) # type: ignore
|
||||
elif fs_path and fs_path.is_dir():
|
||||
rmtree(fs_path)
|
||||
elif fs_path:
|
||||
fs_path.unlink()
|
||||
else:
|
||||
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
||||
|
||||
|
||||
def make_random_file(path: Path, size: int):
|
||||
with path.open("wb") as stream:
|
||||
for chunk_size in iter_chunks(size):
|
||||
stream.write(randbytes(chunk_size))
|
||||
|
||||
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
|
||||
num_full_chunks = (size - 1) // chunk_size
|
||||
for _ in range(num_full_chunks):
|
||||
yield chunk_size
|
||||
offset = num_full_chunks * chunk_size
|
||||
if offset != size:
|
||||
yield size - offset
|
||||
@@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
|
||||
|
||||
obj = cas.read(digest)
|
||||
assert obj is not None
|
||||
assert obj.digest == digest
|
||||
assert obj.object_type == b"blob"
|
||||
assert obj.size == len(data)
|
||||
assert obj.data == data
|
||||
|
||||
cas = SimpleCas(
|
||||
@@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
|
||||
|
||||
obj = cas.read(digest)
|
||||
assert obj is not None
|
||||
assert obj.digest == digest
|
||||
assert obj.object_type == b"blob"
|
||||
assert obj.size == len(data)
|
||||
assert obj.data == data
|
||||
|
||||
digest2 = cas.write(b"blob", data)
|
||||
18
tests/test_bsv/__init__.py
Normal file
18
tests/test_bsv/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""pybsv test module."""
|
||||
|
||||
from __future__ import annotations
|
||||
270
tests/test_bsv/test_cli.py
Normal file
270
tests/test_bsv/test_cli.py
Normal file
@@ -0,0 +1,270 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from datetime import UTC, datetime
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import TYPE_CHECKING, Literal, NamedTuple
|
||||
|
||||
from click.testing import CliRunner
|
||||
from hypothesis import given
|
||||
import hypothesis.strategies as st
|
||||
import pytest
|
||||
|
||||
from bsv import cli
|
||||
from bsv.vfs import Permissions
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def runner(tmp_path: Path) -> CliRunner:
|
||||
runner = CliRunner(env={"BSV_REPO": str(tmp_path)})
|
||||
return runner
|
||||
|
||||
|
||||
@contextmanager
|
||||
def make_runner() -> Generator[CliRunner, None, None]:
|
||||
with TemporaryDirectory(prefix="test_vfs_") as tmp:
|
||||
runner = CliRunner(env={"BSV_REPO": tmp})
|
||||
yield runner
|
||||
|
||||
|
||||
########################################################################################
|
||||
# mkdir
|
||||
|
||||
|
||||
def test_mkdir_fails_with_relative_path(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "test").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "test"])
|
||||
assert result.exit_code == 2
|
||||
assert "test is not an absolute path" in result.stderr
|
||||
|
||||
|
||||
def test_mkdir_default(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "test").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "test").is_dir()
|
||||
|
||||
|
||||
def test_mkdir_multiple_dirs(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "foo").exists()
|
||||
assert not (tmp_path / "bar").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/foo", "/bar"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "foo").is_dir()
|
||||
assert (tmp_path / "bar").is_dir()
|
||||
|
||||
|
||||
def test_mkdir_nested_fails_without_parents(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "foo").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/foo/bar"])
|
||||
assert result.exit_code == 1
|
||||
assert result.stderr == "/foo does not exist\n"
|
||||
assert result.stdout == ""
|
||||
|
||||
|
||||
def test_mkdir_nested(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "foo/bar").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "--parents", "/foo/bar"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "foo/bar").is_dir()
|
||||
|
||||
|
||||
def test_mkdir_message_if_exists(tmp_path: Path, runner: CliRunner):
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "test").is_dir()
|
||||
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == "/test already exists\n"
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "test").is_dir()
|
||||
|
||||
|
||||
def test_mkdir_mode(tmp_path: Path, runner: CliRunner):
|
||||
assert not (tmp_path / "test").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/test", "--mode=741"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "test").is_dir()
|
||||
assert (tmp_path / "test").stat().st_mode & 0o7777 == 0o741
|
||||
|
||||
|
||||
def test_mkdir_verbose(tmp_path: Path, runner: CliRunner):
|
||||
result = runner.invoke(cli.cli, ["mkdir", "/foo"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
assert result.stdout == ""
|
||||
assert (tmp_path / "foo").is_dir()
|
||||
assert not (tmp_path / "bar").exists()
|
||||
result = runner.invoke(cli.cli, ["mkdir", "--verbose", "/foo", "/bar"])
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == "/foo already exists\n"
|
||||
assert result.stdout == "Created /bar\n"
|
||||
assert (tmp_path / "foo").is_dir()
|
||||
assert (tmp_path / "bar").is_dir()
|
||||
|
||||
|
||||
########################################################################################
|
||||
# ls
|
||||
|
||||
|
||||
def permissions(target: Literal["file", "dir"] = "file"):
|
||||
return st.builds(
|
||||
Permissions,
|
||||
st.sampled_from(
|
||||
[
|
||||
0o0400,
|
||||
0o0440,
|
||||
0o0444,
|
||||
0o0600,
|
||||
0o0640,
|
||||
0o0644,
|
||||
0o0664,
|
||||
0o0750,
|
||||
0o0755,
|
||||
0o0777,
|
||||
]
|
||||
if target == "file"
|
||||
else [
|
||||
0o0400,
|
||||
0o0600,
|
||||
0o0640,
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Tree(NamedTuple):
|
||||
type: Literal["file", "dir"]
|
||||
name: str
|
||||
perms: Permissions
|
||||
time: datetime
|
||||
content: bytes | list[Tree]
|
||||
|
||||
@property
|
||||
def type_prefix(self) -> str:
|
||||
if self.type == "dir":
|
||||
return "d"
|
||||
return "-"
|
||||
|
||||
def build(self, parent: Path) -> None:
|
||||
path = parent / self.name
|
||||
if isinstance(self.content, list):
|
||||
path.mkdir(mode=self.perms.unix_perms)
|
||||
for child in self.content:
|
||||
child.build(path)
|
||||
else:
|
||||
path.write_bytes(self.content)
|
||||
path.chmod(self.perms.unix_perms)
|
||||
ts = self.time.timestamp()
|
||||
os.utime(path, (ts, ts))
|
||||
|
||||
|
||||
def filenames() -> st.SearchStrategy:
|
||||
return st.text(
|
||||
st.characters(exclude_categories=["Cc", "Cs"], exclude_characters='<>:"/\\|!*'),
|
||||
min_size=1,
|
||||
max_size=255,
|
||||
).filter(lambda t: len(t.encode()) < 256 and t not in (".", ".."))
|
||||
|
||||
|
||||
@st.composite
|
||||
def trees(draw: st.DrawFn, max_depth: int = 3) -> Tree:
|
||||
file_type = draw(st.sampled_from(["file", "dir"]))
|
||||
content = (
|
||||
st.binary()
|
||||
if file_type == "file"
|
||||
else st.lists(
|
||||
trees(max_depth - 1),
|
||||
unique_by=lambda t: t.name,
|
||||
max_size=0 if max_depth == 0 else 10,
|
||||
)
|
||||
)
|
||||
return Tree(
|
||||
file_type, # type: ignore
|
||||
draw(filenames()),
|
||||
draw(permissions(file_type)), # type: ignore
|
||||
draw(
|
||||
st.datetimes(
|
||||
min_value=datetime(1902, 1, 1),
|
||||
max_value=datetime(2100, 1, 1),
|
||||
timezones=st.just(UTC),
|
||||
)
|
||||
),
|
||||
draw(content),
|
||||
)
|
||||
|
||||
|
||||
def trees_lists(max_depth: int = 3) -> st.SearchStrategy:
|
||||
return st.lists(trees(max_depth=max_depth), unique_by=lambda t: t.name)
|
||||
|
||||
|
||||
@given(trees=trees_lists(max_depth=0))
|
||||
def test_ls(trees: list[Tree]):
|
||||
with make_runner() as runner:
|
||||
path = Path(runner.env["BSV_REPO"] or "")
|
||||
for tree in trees:
|
||||
tree.build(path)
|
||||
|
||||
result = runner.invoke(cli.cli, ["ls", "-lA"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert result.stderr == ""
|
||||
|
||||
trees.sort(key=lambda t: t.name)
|
||||
lines = [line for line in result.stdout.splitlines() if line != "\n"]
|
||||
|
||||
for line, tree in zip(lines, trees, strict=True):
|
||||
match = re.fullmatch(
|
||||
r"""
|
||||
([dl-])([r-][w-][x-][r-][w-][x-][r-][w-][x-])
|
||||
\ +(\d+)
|
||||
\ (\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})
|
||||
\ ([^\n]+)
|
||||
""",
|
||||
line,
|
||||
re.VERBOSE,
|
||||
)
|
||||
assert match
|
||||
assert match[1] == tree.type_prefix
|
||||
assert match[2] == str(tree.perms)
|
||||
if tree.type_prefix != "d":
|
||||
assert match[3] == str(len(tree.content))
|
||||
assert match[4] == tree.time.astimezone().replace(tzinfo=None).isoformat(
|
||||
" ", "seconds"
|
||||
)
|
||||
assert match[5] == tree.name
|
||||
|
||||
pass
|
||||
56
tests/test_bsv/test_cli_utils.py
Normal file
56
tests/test_bsv/test_cli_utils.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Tests for cli_utils.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from bsv.cli_utils import format_human_byte_size
|
||||
|
||||
|
||||
def test_format_human_byte_size():
|
||||
assert format_human_byte_size(0) == "0B"
|
||||
assert format_human_byte_size(1) == "1B"
|
||||
assert format_human_byte_size(9) == "9B"
|
||||
assert format_human_byte_size(10) == "10B"
|
||||
assert format_human_byte_size(99) == "99B"
|
||||
assert format_human_byte_size(100) == "100B"
|
||||
assert format_human_byte_size(999) == "999B"
|
||||
assert format_human_byte_size(1000) == "1000B"
|
||||
assert format_human_byte_size(1023) == "1023B"
|
||||
assert format_human_byte_size(2**10) == "1KiB"
|
||||
assert format_human_byte_size(int(1.23456 * 2**10)) == "1.23KiB"
|
||||
assert format_human_byte_size(9 * 2**10) == "9KiB"
|
||||
assert format_human_byte_size(10 * 2**10 - 1) == "10KiB"
|
||||
assert format_human_byte_size(int(98.76543 * 2**10)) == "98.8KiB"
|
||||
assert format_human_byte_size(99 * 2**10 - 1) == "99KiB"
|
||||
assert format_human_byte_size(100 * 2**10 - 1) == "100KiB"
|
||||
assert format_human_byte_size(int(192.8374 * 2**10)) == "193KiB"
|
||||
assert format_human_byte_size(999 * 2**10 - 1) == "999KiB"
|
||||
assert format_human_byte_size(1000 * 2**10 - 1) == "1000KiB"
|
||||
assert format_human_byte_size(2**20 - 1) == "1MiB"
|
||||
assert format_human_byte_size(2**20) == "1MiB"
|
||||
assert format_human_byte_size(2**30) == "1GiB"
|
||||
assert format_human_byte_size(2**40) == "1TiB"
|
||||
assert format_human_byte_size(2**50) == "1PiB"
|
||||
assert format_human_byte_size(2**60) == "1EiB"
|
||||
assert format_human_byte_size(2**70) == "1ZiB"
|
||||
assert format_human_byte_size(2**80) == "1YiB"
|
||||
assert format_human_byte_size(2**90) == "1RiB"
|
||||
assert format_human_byte_size(2**100 - 2**80) == "1QiB"
|
||||
assert format_human_byte_size(2**100) == "1QiB"
|
||||
assert format_human_byte_size(2**110 - 2**90) == "1024QiB"
|
||||
assert format_human_byte_size(2**110) == "1024QiB"
|
||||
assert format_human_byte_size(2**120) == "1048576QiB"
|
||||
394
tests/test_bsv/test_vfs.py
Normal file
394
tests/test_bsv/test_vfs.py
Normal file
@@ -0,0 +1,394 @@
|
||||
# pybsv - Backup, Synchronization, Versioning.
|
||||
# Copyright (C) 2025 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
"""Tests for the `VirtualFileSystem` class and related stuff."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePosixPath
|
||||
|
||||
import pytest
|
||||
|
||||
from bsv.vfs import FileMetadata, FsError, Permissions, VirtualFileSystem
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fs(tmp_path: Path) -> VirtualFileSystem:
|
||||
"""Fixture that returns a `VirtualFileSystem`."""
|
||||
return VirtualFileSystem(tmp_path)
|
||||
|
||||
|
||||
########################################################################################
|
||||
# Permissions
|
||||
|
||||
|
||||
def test_permissions():
|
||||
perm0 = Permissions(0o1234)
|
||||
assert perm0.unix_perms == 0o1234
|
||||
|
||||
perm1 = Permissions("752")
|
||||
assert perm1.unix_perms == 0o752
|
||||
|
||||
assert perm0 == perm0
|
||||
assert perm0 != perm1
|
||||
|
||||
assert repr(perm0) == "Permissions(0o1234)"
|
||||
assert repr(perm1) == "Permissions(0o0752)"
|
||||
|
||||
assert str(perm0) == "-w--wxr-T"
|
||||
assert str(perm1) == "rwxr-x-w-"
|
||||
|
||||
|
||||
########################################################################################
|
||||
# FileMetadata
|
||||
|
||||
|
||||
def test_file_metadata():
|
||||
path = PurePosixPath("/some_dir/some_file")
|
||||
permissions = Permissions(0o1234)
|
||||
mod_time = datetime(2025, 7, 12, 12, 34, 56, tzinfo=UTC)
|
||||
|
||||
file_md = FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
|
||||
assert file_md.path == path
|
||||
assert file_md.type == "file"
|
||||
assert file_md.permissions == permissions
|
||||
assert file_md.modification_time == mod_time
|
||||
assert file_md.byte_size == 123
|
||||
assert file_md.unix_mode == "--w--wxr-T"
|
||||
assert not file_md.is_hidden
|
||||
assert file_md.is_file
|
||||
assert not file_md.is_dir
|
||||
assert not file_md.is_symlink
|
||||
assert not file_md.is_other
|
||||
|
||||
dir_md = FileMetadata(
|
||||
path,
|
||||
type="dir",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
assert dir_md.type == "dir"
|
||||
assert not dir_md.is_file
|
||||
assert dir_md.is_dir
|
||||
assert not dir_md.is_symlink
|
||||
assert not dir_md.is_other
|
||||
|
||||
symlink_md = FileMetadata(
|
||||
path,
|
||||
type="symlink",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
assert symlink_md.type == "symlink"
|
||||
assert not symlink_md.is_file
|
||||
assert not symlink_md.is_dir
|
||||
assert symlink_md.is_symlink
|
||||
assert not symlink_md.is_other
|
||||
|
||||
other_md = FileMetadata(
|
||||
path,
|
||||
type="other",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
assert other_md.type == "other"
|
||||
assert not other_md.is_file
|
||||
assert not other_md.is_dir
|
||||
assert not other_md.is_symlink
|
||||
assert other_md.is_other
|
||||
|
||||
assert FileMetadata(
|
||||
PurePosixPath("/some_dir/.some_file"),
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
).is_hidden
|
||||
|
||||
|
||||
def test_file_metadata_eq():
|
||||
path = PurePosixPath("/some_dir/some_file")
|
||||
permissions = Permissions(0o1234)
|
||||
mod_time = datetime(2025, 7, 12, 12, 34, 56, tzinfo=UTC)
|
||||
|
||||
md = FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
|
||||
assert (
|
||||
FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
== md
|
||||
)
|
||||
assert (
|
||||
FileMetadata(
|
||||
PurePosixPath("/some_dir/some_other_file"),
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
!= md
|
||||
)
|
||||
assert (
|
||||
FileMetadata(
|
||||
path,
|
||||
type="dir",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
!= md
|
||||
)
|
||||
assert (
|
||||
FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=Permissions(0o0752),
|
||||
modification_time=mod_time,
|
||||
byte_size=123,
|
||||
)
|
||||
!= md
|
||||
)
|
||||
assert (
|
||||
FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=datetime(2025, 1, 2, 3, 4, 5),
|
||||
byte_size=123,
|
||||
)
|
||||
!= md
|
||||
)
|
||||
assert (
|
||||
FileMetadata(
|
||||
path,
|
||||
type="file",
|
||||
permissions=permissions,
|
||||
modification_time=mod_time,
|
||||
byte_size=124,
|
||||
)
|
||||
!= md
|
||||
)
|
||||
|
||||
|
||||
########################################################################################
|
||||
# mkdir
|
||||
|
||||
|
||||
def test_mkdir_fails_with_relative_path(fs: VirtualFileSystem):
|
||||
with pytest.raises(FsError):
|
||||
fs.mkdir("test")
|
||||
|
||||
|
||||
def test_mkdir_default(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
fs.mkdir("/test")
|
||||
assert fs.is_dir("/test")
|
||||
|
||||
|
||||
def test_mkdir_nested_fails_without_parents(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/foo")
|
||||
with pytest.raises(FsError):
|
||||
fs.mkdir("/foo/bar")
|
||||
|
||||
|
||||
def test_mkdir_nested(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
fs.mkdir("/test/foobar", parents=True)
|
||||
assert fs.is_dir("/test/foobar")
|
||||
|
||||
|
||||
def test_mkdir_fails_if_exists(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/foo")
|
||||
fs.mkdir("/foo")
|
||||
assert fs.is_dir("/foo")
|
||||
with pytest.raises(FsError):
|
||||
fs.mkdir("/foo")
|
||||
|
||||
|
||||
def test_mkdir_exists_ok(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
fs.mkdir("/test")
|
||||
assert fs.is_dir("/test")
|
||||
fs.mkdir("/test", exist_ok=True)
|
||||
|
||||
|
||||
def test_mkdir_exists_ok_fail_if_file(fs: VirtualFileSystem):
|
||||
fs.write_bytes("/test", b"test")
|
||||
assert fs.is_file("/test")
|
||||
with pytest.raises(FsError):
|
||||
fs.mkdir("/test", exist_ok=True)
|
||||
|
||||
|
||||
def test_mkdir_mode(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
permissions = Permissions(0o741)
|
||||
fs.mkdir("/test", mode=permissions)
|
||||
assert fs.is_dir("/test")
|
||||
assert fs.metadata("/test").permissions == permissions
|
||||
|
||||
|
||||
########################################################################################
|
||||
# read_bytes / write_bytes
|
||||
|
||||
|
||||
def test_read_write_bytes(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
|
||||
fs.write_bytes("/test", b"This is a test.")
|
||||
assert fs.read_bytes("/test") == b"This is a test."
|
||||
|
||||
stream = BytesIO(b"Another test.")
|
||||
fs.write_bytes("/test", stream)
|
||||
assert fs.read_bytes("/test") == b"Another test."
|
||||
|
||||
with pytest.raises(FsError):
|
||||
fs.read_bytes("/does_not_exist")
|
||||
|
||||
with pytest.raises(FsError):
|
||||
fs.write_bytes("/does_not_exist/foobar", b"")
|
||||
|
||||
|
||||
def test_open_read_write(fs: VirtualFileSystem):
|
||||
assert not fs.exists("/test")
|
||||
|
||||
with fs.open_write("/test") as stream:
|
||||
stream.write(b"foo")
|
||||
stream.write(b"bar")
|
||||
|
||||
assert fs.exists("/test")
|
||||
with fs.open_read("/test") as stream:
|
||||
assert stream.read(3) == b"foo"
|
||||
assert stream.read(3) == b"bar"
|
||||
assert stream.read() == b""
|
||||
|
||||
# Test overwrite
|
||||
with fs.open_write("/test") as stream:
|
||||
stream.write(b"baz")
|
||||
|
||||
with fs.open_read("/test") as stream:
|
||||
assert stream.read() == b"baz"
|
||||
|
||||
with pytest.raises(FsError):
|
||||
fs.open_read("/does_not_exist")
|
||||
|
||||
with pytest.raises(FsError):
|
||||
fs.open_write("/does_not_exist/foobar")
|
||||
|
||||
|
||||
########################################################################################
|
||||
# metadata
|
||||
|
||||
|
||||
def test_metadata(fs: VirtualFileSystem):
|
||||
file_permissions = Permissions(0o754)
|
||||
file_time = datetime(2025, 5, 17, 13, 57, 32, tzinfo=UTC)
|
||||
file_content = b"This is a test\n"
|
||||
|
||||
fs.write_bytes("/test_file", file_content)
|
||||
fs.set_permissions("/test_file", file_permissions)
|
||||
fs.set_modification_time("/test_file", file_time)
|
||||
|
||||
md = fs.metadata("/test_file")
|
||||
assert md.path == PurePosixPath("/test_file")
|
||||
assert md.permissions == file_permissions
|
||||
assert md.type == "file"
|
||||
assert md.modification_time == file_time
|
||||
assert md.byte_size == len(file_content)
|
||||
assert not md.is_hidden
|
||||
assert fs.metadata("/test_file") == md
|
||||
assert fs.is_file("/test_file")
|
||||
assert not fs.is_dir("/test_file")
|
||||
assert not fs.is_symlink("/test_file")
|
||||
assert not fs.is_other("/test_file")
|
||||
|
||||
fs.set_permissions("/test_file", Permissions(0o644))
|
||||
assert fs.metadata("/test_file") != md
|
||||
|
||||
fs.mkdir("/.test_dir")
|
||||
md = fs.metadata("/.test_dir")
|
||||
assert md.type == "dir"
|
||||
assert fs.metadata("/.test_dir").is_hidden
|
||||
assert not fs.is_file("/.test_dir")
|
||||
assert fs.is_dir("/.test_dir")
|
||||
assert not fs.is_symlink("/.test_dir")
|
||||
assert not fs.is_other("/.test_dir")
|
||||
|
||||
fs.make_link("/test_link", "/link_target")
|
||||
md = fs.metadata("/test_link")
|
||||
assert md.type == "symlink"
|
||||
assert not fs.is_file("/test_link")
|
||||
assert not fs.is_dir("/test_link")
|
||||
assert fs.is_symlink("/test_link")
|
||||
assert not fs.is_other("/test_link")
|
||||
|
||||
assert fs.metadata_or_none("/does_not_exist") is None
|
||||
with pytest.raises(FsError):
|
||||
fs.metadata("/does_not_exist")
|
||||
|
||||
|
||||
########################################################################################
|
||||
# iter_dir
|
||||
|
||||
|
||||
def test_iter_dir(fs: VirtualFileSystem):
|
||||
expected = [
|
||||
(PurePosixPath("/dir"), "dir"),
|
||||
(PurePosixPath("/file"), "file"),
|
||||
(PurePosixPath("/link"), "symlink"),
|
||||
]
|
||||
for path, file_type in expected:
|
||||
if file_type == "dir":
|
||||
fs.mkdir(path)
|
||||
elif file_type == "file":
|
||||
fs.write_bytes(path, b"")
|
||||
elif file_type == "symlink":
|
||||
fs.make_link(path, "/foobar")
|
||||
|
||||
items_metadata = sorted(fs.iter_dir("/"))
|
||||
for md, [path, file_type] in zip(items_metadata, expected, strict=True):
|
||||
assert md.path == path
|
||||
assert md.type == file_type
|
||||
|
||||
|
||||
def test_iter_dir_failure(fs: VirtualFileSystem):
|
||||
with pytest.raises(FsError):
|
||||
list(fs.iter_dir("/test"))
|
||||
|
||||
fs.write_bytes("/test", b"")
|
||||
with pytest.raises(FsError):
|
||||
list(fs.iter_dir("/test"))
|
||||
@@ -1,111 +0,0 @@
|
||||
# bsv - Backup, Synchronization, Versioning
|
||||
# Copyright (C) 2023 Simon Boyé
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from __future__ import annotations
|
||||
from datetime import UTC, datetime
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from random import randbytes
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
|
||||
from bsv.simple_cas.cas import Digest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_dir():
|
||||
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
|
||||
yield Path(tmp_dir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def repo(tmp_dir):
|
||||
return create_repository(
|
||||
tmp_dir / "bsv",
|
||||
"test_repo",
|
||||
)
|
||||
|
||||
|
||||
def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
||||
path = tmp_dir / "test.dat"
|
||||
make_random_file(path, 1 << 20)
|
||||
|
||||
with path.open("rb") as stream:
|
||||
digest = repo.add_blob(stream)
|
||||
|
||||
blob = repo.get_blob(digest)
|
||||
data = blob.read()
|
||||
|
||||
with path.open("rb") as stream:
|
||||
assert data == stream.read()
|
||||
|
||||
|
||||
def test_read_write_tree(repo: Repository):
|
||||
now = datetime.now(UTC)
|
||||
tree = Tree(
|
||||
repo,
|
||||
[
|
||||
TreeItem(
|
||||
"xyz",
|
||||
Digest(bytes([42]) * repo._cas._digest_size),
|
||||
0o744,
|
||||
creation_timestamp = timestamp_from_time(now),
|
||||
modification_timestamp = timestamp_from_time(now),
|
||||
),
|
||||
TreeItem(
|
||||
"foobar",
|
||||
Digest(bytes([123]) * repo._cas._digest_size),
|
||||
0o777,
|
||||
creation_timestamp = timestamp_from_time(now),
|
||||
modification_timestamp = timestamp_from_time(now),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
||||
|
||||
digest = repo.add_tree(tree)
|
||||
assert repo.get_tree(digest) == tree
|
||||
|
||||
|
||||
def test_read_write_snapshot(repo: Repository):
|
||||
snapshot = Snapshot(
|
||||
repo = repo,
|
||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||
repo_name = "test_repo",
|
||||
timestamp = timestamp_from_time(datetime.now()),
|
||||
)
|
||||
|
||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||
|
||||
digest = repo.add_snapshot(snapshot)
|
||||
assert repo.get_snapshot(digest) == snapshot
|
||||
|
||||
|
||||
def make_random_file(path: Path, size: int):
|
||||
with path.open("wb") as stream:
|
||||
for chunk_size in iter_chunks(size):
|
||||
stream.write(randbytes(chunk_size))
|
||||
|
||||
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
|
||||
num_full_chunks = (size - 1) // chunk_size
|
||||
for _ in range(num_full_chunks):
|
||||
yield chunk_size
|
||||
offset = num_full_chunks * chunk_size
|
||||
if offset != size:
|
||||
yield size - offset
|
||||
Reference in New Issue
Block a user