Compare commits
12 Commits
67d15f989a
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| f95c68ee41 | |||
| 2b961baa5b | |||
| 69bb85af01 | |||
| a97395370a | |||
| b1d2fe7717 | |||
| e74eaf0408 | |||
| d058cd0631 | |||
| 52a553d72b | |||
| eb6ae85698 | |||
| 073fd5e567 | |||
| 17bef2e63a | |||
| 7420d891d4 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,4 +1,6 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
|
*.egg-info
|
||||||
|
/.coverage
|
||||||
|
/.hypothesis/
|
||||||
/src/bsv/_version.py
|
/src/bsv/_version.py
|
||||||
/venv
|
/venv
|
||||||
*.egg-info
|
|
||||||
|
|||||||
82
README.md
82
README.md
@@ -1,3 +1,83 @@
|
|||||||
# BSV - Backup, Synchronization, Versioning
|
# BSV - Backup, Synchronization, Versioning
|
||||||
|
|
||||||
Readme to be done.
|
Bsv is a tool to perform backups, file/directory synchronization between devices and light versioning. It's architecture is inspired from git, but focus on backup first, synchronization second and as a byproduct also allow versioning.
|
||||||
|
|
||||||
|
Bsv is in a very early stage of development and is not production ready yet.
|
||||||
|
|
||||||
|
|
||||||
|
## Development setup
|
||||||
|
|
||||||
|
Bsv currently only supports Python >= 3.11. Once you have a compatible Python version, just fetch the code and install bsv in editable mode in a virtual env:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://git.draklia.net/draklaw/pybsv.git
|
||||||
|
cd pybsv
|
||||||
|
python -m venv venv
|
||||||
|
. venv/bin/activate
|
||||||
|
pip install -e .[test]
|
||||||
|
```
|
||||||
|
|
||||||
|
This will automatically install the dependencies (including `pytest`). Happy hacking !
|
||||||
|
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
### v0.0.1 - Simple CAS + insert/remove files manually
|
||||||
|
|
||||||
|
Basic features. Naïve CAS implementation that store everything in a single big file with no option for removing objects. Supports a single "local" repository.
|
||||||
|
|
||||||
|
- [x] Simple CAS implementation (it's OK if it's naïve).
|
||||||
|
- [x] Content-based chunking to store files.
|
||||||
|
- [x] `bsv init` command to initialize bsv.
|
||||||
|
- [x] `bsv info` print useful information bsv configuration.
|
||||||
|
- [ ] `bsv log` show the history of snapshots.
|
||||||
|
- [ ] `bsv show <digest>` show the object `digest`.
|
||||||
|
- [ ] `bsv ls <bsv-path>` list files in a bsv directory.
|
||||||
|
- [ ] `bsv mkdir <bsv-path>` create a directory in bsv directly.
|
||||||
|
- [ ] `bsv add [-r] <fs-path> <bsv-path>` copy files from the filesystem to bsv.
|
||||||
|
- [ ] `bsv get [-r] <bsv-path> <fs-path>` copy files from bsv to the filesystem.
|
||||||
|
- [ ] `bsv rm [-r] <bsv-path>` create a directory in bsv directly.
|
||||||
|
|
||||||
|
### v0.0.2 - File map + snapshots
|
||||||
|
|
||||||
|
Add support for mapping files from BSV virtual file system to the actual file system. Add snapshot and restore commands.
|
||||||
|
|
||||||
|
- [ ] `bsv map` list mappings between bsv paths and filesystem paths.
|
||||||
|
- [ ] `bsv map add <bsv-path> <fs-path>` add a mapping.
|
||||||
|
- [ ] `bsv map remove <bsv-path> <fs-path>` remove a mapping.
|
||||||
|
- [ ] `bsv snapshot` capture a snapshot, i.e. ensure that mapped files in the repositories match what is on the filesystem.
|
||||||
|
- [ ] `bsv restore <snapshot> <fs-path>` update files on the filesystem to the version captured by `snapshot`.
|
||||||
|
|
||||||
|
### v0.0.3 - Multiple repository
|
||||||
|
|
||||||
|
Support multiple repository. Repository can be configured to store only metadata (typically for the local repository) or everything.
|
||||||
|
|
||||||
|
- [ ] Support repositories that store only metadata.
|
||||||
|
- [ ] `bsv repo [-v]` list all known repositories.
|
||||||
|
- [ ] `bsv repo create <name> <destination>` create a new repository.
|
||||||
|
- [ ] `bsv repo add [<name>] <destination>` add an already existing repository.
|
||||||
|
- [ ] `bsv repo remove <name>|<destination>` remove a repository.
|
||||||
|
- [ ] `bsv fetch [<name>]` fetch latest metadata from known repositories.
|
||||||
|
- [ ] `bsv sync` similar to `snapshot` + `fetch` + `restore`: Fetch latest changes from the repositories and update the filesystem to match. In case of conflict (file changed both in the repositories and locally), performs a snapshot first to ensure all conflicting versions are backed'up, then use some conflict-resolution strategy and warn the user.
|
||||||
|
|
||||||
|
### v0.0.4 - Proper CAS
|
||||||
|
|
||||||
|
- [ ] Safe concurrent access (e.g. when several devices use a shared repository).
|
||||||
|
- [ ] Support removing objects.
|
||||||
|
- [ ] Garbage collection (remove unreferenced objects).
|
||||||
|
- [ ] Use garbage collection to keep metadata-only repository clean.
|
||||||
|
|
||||||
|
### v0.0.5 - Some extra features
|
||||||
|
|
||||||
|
- [ ] `bsv tag <name> [<snapshot>] [-m <message>]` set/update a tag (an alias to a specific snapshot).
|
||||||
|
- [ ] Support for symlinks.
|
||||||
|
|
||||||
|
### Later
|
||||||
|
|
||||||
|
- [ ] `bsv watch` starts a daemon that watch changes in mapped directories and automatically create snapshots.
|
||||||
|
- [ ] `bsv http` starts an http server that expose an API + an interface to manipulate BSV. Allow to list files, explore history, download and upload files...
|
||||||
|
- [ ] Bsv protocol + client/server
|
||||||
|
- [ ] Custom rules for repository to select what must be stored or not.
|
||||||
|
- [ ] Create sensible rules for backup (keep a lot of recent versions, less for older versions).
|
||||||
|
- [ ] Add object set support (a kind of object that simply store a collection of objects). Can be used as tag.
|
||||||
|
- [ ] Add mail object ?
|
||||||
|
|||||||
@@ -12,22 +12,78 @@ classifiers = [
|
|||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"click",
|
||||||
"fastcdc",
|
"fastcdc",
|
||||||
|
"rich",
|
||||||
"tomlkit",
|
"tomlkit",
|
||||||
|
"typing-extensions"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
test = [
|
dev = [
|
||||||
|
"hypothesis",
|
||||||
"pytest",
|
"pytest",
|
||||||
|
"pytest-cov"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
# TODO
|
"Homepage" = "https://git.draklia.net/draklaw/pybsv"
|
||||||
# "Homepage" = "https://github.com/pypa/sampleproject"
|
"Bug Tracker" = "https://git.draklia.net/draklaw/pybsv/issues"
|
||||||
# "Bug Tracker" = "https://github.com/pypa/sampleproject/issues"
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
bsv = "bsv.main:main"
|
bsv = "bsv.cli:cli"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py311"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = [
|
||||||
|
"B", # flake8-bugbear
|
||||||
|
"D", # pydocstyle
|
||||||
|
"DOC", # pydoclint
|
||||||
|
"E", # pycodestyle
|
||||||
|
"EM", # flake8-errmsg
|
||||||
|
"F", # Pyflakes
|
||||||
|
"FURB", # refurb
|
||||||
|
"G", # flake8-logging-format
|
||||||
|
"I", # isort
|
||||||
|
"ISC", # flake8-implicit-str-concat
|
||||||
|
"LOG", # flake8-logging
|
||||||
|
"N", # pep8-naming
|
||||||
|
"PERF", # Perflint
|
||||||
|
"PT", # flake8-pytest-style
|
||||||
|
"PTH", # flake8-use-pathlib
|
||||||
|
"S", # flake8-bandit
|
||||||
|
"SIM", # flake8-simplify
|
||||||
|
"TC", # flake8-type-checking
|
||||||
|
"UP", # pyupgrade
|
||||||
|
"W", # pycodestyle
|
||||||
|
]
|
||||||
|
ignore = [
|
||||||
|
"UP038", # Deprecated rule; bad idea.
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"**/tests/*" = [
|
||||||
|
"D103", # Missing docstring in public function
|
||||||
|
"S101", # Use of assert detected
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.isort]
|
||||||
|
force-sort-within-sections = true
|
||||||
|
lines-after-imports = 2
|
||||||
|
required-imports = ["from __future__ import annotations"]
|
||||||
|
|
||||||
|
[tool.ruff.lint.pydocstyle]
|
||||||
|
convention = "google"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.coverage.report]
|
||||||
|
exclude_also = [
|
||||||
|
"if TYPE_CHECKING:",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools", "setuptools-scm"]
|
requires = ["setuptools", "setuptools-scm"]
|
||||||
|
|||||||
18
src/bsv.bak/__init__.py
Normal file
18
src/bsv.bak/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from bsv._version import __version__, __version_tuple__
|
||||||
21
src/bsv.bak/__main__.py
Normal file
21
src/bsv.bak/__main__.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from bsv.main import main
|
||||||
|
|
||||||
|
|
||||||
|
exit(main())
|
||||||
16
src/bsv.bak/_version.py
Normal file
16
src/bsv.bak/_version.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# file generated by setuptools_scm
|
||||||
|
# don't change, don't track in version control
|
||||||
|
TYPE_CHECKING = False
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from typing import Tuple, Union
|
||||||
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
||||||
|
else:
|
||||||
|
VERSION_TUPLE = object
|
||||||
|
|
||||||
|
version: str
|
||||||
|
__version__: str
|
||||||
|
__version_tuple__: VERSION_TUPLE
|
||||||
|
version_tuple: VERSION_TUPLE
|
||||||
|
|
||||||
|
__version__ = version = '0.0.1.dev8+g52a553d.d20231127'
|
||||||
|
__version_tuple__ = version_tuple = (0, 0, 1, 'dev8', 'g52a553d.d20231127')
|
||||||
126
src/bsv.bak/cli.py
Normal file
126
src/bsv.bak/cli.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
|
||||||
|
_console: Console | None = None
|
||||||
|
def get_console() -> Console:
|
||||||
|
assert _console is not None
|
||||||
|
return _console
|
||||||
|
|
||||||
|
_error_console: Console | None = None
|
||||||
|
def get_error_console() -> Console:
|
||||||
|
assert _error_console is not None
|
||||||
|
return _error_console
|
||||||
|
|
||||||
|
|
||||||
|
def init_consoles(color: str="auto"):
|
||||||
|
global _console
|
||||||
|
global _error_console
|
||||||
|
|
||||||
|
assert _console is None
|
||||||
|
assert _error_console is None
|
||||||
|
|
||||||
|
kwargs: dict[str, Any] = {
|
||||||
|
"tab_size": 4,
|
||||||
|
}
|
||||||
|
match color:
|
||||||
|
case "always":
|
||||||
|
kwargs["force_terminal"] = True
|
||||||
|
case "auto":
|
||||||
|
pass
|
||||||
|
case "never":
|
||||||
|
kwargs["no_color"] = True
|
||||||
|
|
||||||
|
_console = Console(
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
_error_console = Console(
|
||||||
|
stderr = True,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
PromptType = TypeVar("PromptType")
|
||||||
|
|
||||||
|
class NoDefaultType:
|
||||||
|
def __repr__(self):
|
||||||
|
return "NoDefault"
|
||||||
|
NoDefault = NoDefaultType()
|
||||||
|
|
||||||
|
def prompt(
|
||||||
|
prompt: str,
|
||||||
|
factory: Callable[[str], PromptType],
|
||||||
|
*,
|
||||||
|
console: Console | None = None,
|
||||||
|
default: PromptType | NoDefaultType = NoDefault,
|
||||||
|
show_default: bool = True,
|
||||||
|
) -> PromptType:
|
||||||
|
if console is None:
|
||||||
|
console = get_console()
|
||||||
|
|
||||||
|
prompt_text = Text(prompt, style="prompt")
|
||||||
|
prompt_text.end = ""
|
||||||
|
if show_default and default is not NoDefault:
|
||||||
|
prompt_text.append(" ")
|
||||||
|
prompt_text.append(f"({default})", style="prompt.default")
|
||||||
|
prompt_text.append(": ")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
value = console.input(prompt_text)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not value and not isinstance(default, NoDefaultType):
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return factory(value)
|
||||||
|
except ValueError as err:
|
||||||
|
console.print(err)
|
||||||
|
|
||||||
|
def prompt_confirmation(prompt: str, *, console: Console | None=None, default: bool=True) -> bool:
|
||||||
|
if console is None:
|
||||||
|
console = get_console()
|
||||||
|
|
||||||
|
prompt_text = Text(prompt, style="prompt")
|
||||||
|
prompt_text.end = ""
|
||||||
|
prompt_text.append(" ")
|
||||||
|
if default:
|
||||||
|
prompt_text.append("(Y/n)", style="prompt.default")
|
||||||
|
else:
|
||||||
|
prompt_text.append("(y/N)", style="prompt.default")
|
||||||
|
prompt_text.append(": ")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
value = console.input(prompt_text).strip().lower()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not value and not isinstance(default, NoDefaultType):
|
||||||
|
return default
|
||||||
|
if value not in "yn":
|
||||||
|
console.print("Please answer 'y' or 'n'.")
|
||||||
|
else:
|
||||||
|
return value == "y"
|
||||||
@@ -19,6 +19,7 @@ from argparse import ArgumentParser
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.cli import get_console
|
||||||
from bsv.command import command
|
from bsv.command import command
|
||||||
from bsv.repository import Repository
|
from bsv.repository import Repository
|
||||||
|
|
||||||
@@ -33,27 +34,29 @@ def init_parser(parser: ArgumentParser):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@command(init_parser)
|
@command(init_parser)
|
||||||
def info(repository_path: Path | None, verbosity: int=0) -> int:
|
def info(config_path: Path, verbosity: int=0) -> int:
|
||||||
"""Print informations about bsv: config file used, known repository, file mapping...
|
"""Print informations about bsv: config file used, known repository, file mapping...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
print(f"bsv v{__version__}")
|
print = get_console().print
|
||||||
|
|
||||||
if repository_path is None:
|
print(f"bsv [green]v{__version__}")
|
||||||
print("Repository path not found. Bsv is likely not setup on this device.")
|
|
||||||
|
if not config_path.exists():
|
||||||
|
print("bsv configuration not found. Bsv is likely not setup on this device.", style="red")
|
||||||
return 0
|
return 0
|
||||||
else:
|
|
||||||
print(f"Repository path: {repository_path}")
|
|
||||||
|
|
||||||
repo = Repository(repository_path)
|
repo = Repository(config_path)
|
||||||
|
|
||||||
print(f"Repository name: {repo.name}")
|
print(f"[blue]Config path: [bold yellow]{repo.config_path}")
|
||||||
|
print(f"[blue]Device name: [bold yellow]{repo.device_name}")
|
||||||
|
print(f"[blue]Local repository: [bold yellow]{repo._local_repository_path}")
|
||||||
|
|
||||||
if repo.path_map:
|
print("[blue]Path map:[/blue] (bsv path <-> filesystem path)")
|
||||||
print("Path map: (bsv path <-> filesystem path)")
|
if repo.path_map.pairs:
|
||||||
for pair in sorted(repo.path_map):
|
for pair in sorted(repo.path_map.pairs):
|
||||||
print(f" {pair.bsv} <-> {pair.fs}")
|
print(f" {pair.bsv} <-> {pair.fs}")
|
||||||
else:
|
else:
|
||||||
print("Path map is empty.")
|
print(" [bold yellow]No path mapped.")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
118
src/bsv.bak/command/init.py
Normal file
118
src/bsv.bak/command/init.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from pathlib import Path
|
||||||
|
import platform
|
||||||
|
|
||||||
|
from bsv.command import command
|
||||||
|
from bsv.repository import check_config_path, check_device_name, check_local_repository_path, create_repository
|
||||||
|
from bsv.util import default_local_repository_path
|
||||||
|
|
||||||
|
|
||||||
|
def init_parser(parser: ArgumentParser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--interactive", "-i",
|
||||||
|
default = False,
|
||||||
|
action = "store_true",
|
||||||
|
help = "Prompt the user for configuration choices.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--local-repository", "-l",
|
||||||
|
type = Path,
|
||||||
|
default = default_local_repository_path(),
|
||||||
|
nargs = "?",
|
||||||
|
dest = "local_repository_path",
|
||||||
|
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--device-name", "-n",
|
||||||
|
default = platform.node(),
|
||||||
|
help = "Name of the device. Default to system hostname.",
|
||||||
|
)
|
||||||
|
|
||||||
|
@command(init_parser)
|
||||||
|
def init(
|
||||||
|
config_path: Path,
|
||||||
|
device_name: str,
|
||||||
|
local_repository_path: Path,
|
||||||
|
interactive: bool = False,
|
||||||
|
) -> int:
|
||||||
|
"""Initialize a new bsv repository.
|
||||||
|
"""
|
||||||
|
from datetime import datetime as DateTime
|
||||||
|
import tomlkit
|
||||||
|
|
||||||
|
from bsv.cli import get_console, get_error_console, prompt, prompt_confirmation
|
||||||
|
|
||||||
|
print = get_console().print
|
||||||
|
|
||||||
|
def make_config_path(value: str) -> Path:
|
||||||
|
path = Path(value.strip())
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = path.resolve()
|
||||||
|
check_config_path(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def make_device_name(value: str) -> str:
|
||||||
|
device_name = value.strip()
|
||||||
|
check_device_name(device_name)
|
||||||
|
return device_name
|
||||||
|
|
||||||
|
def make_local_repository_path(value: str) -> Path:
|
||||||
|
path = Path(value)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = path.resolve()
|
||||||
|
check_local_repository_path(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
if interactive:
|
||||||
|
config_path = prompt("Bsv configuration file", make_config_path, default=config_path)
|
||||||
|
device_name = prompt("Device name", make_device_name, default=device_name)
|
||||||
|
local_repository_path = prompt("Destination", make_local_repository_path, default=local_repository_path)
|
||||||
|
|
||||||
|
if not config_path.is_absolute():
|
||||||
|
config_path = config_path.resolve()
|
||||||
|
if not local_repository_path.is_absolute():
|
||||||
|
local_repository_path = local_repository_path.resolve()
|
||||||
|
|
||||||
|
try:
|
||||||
|
check_config_path(config_path)
|
||||||
|
check_device_name(device_name)
|
||||||
|
check_local_repository_path(local_repository_path)
|
||||||
|
except ValueError as err:
|
||||||
|
get_error_console().print(err, style="bold red")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("Bsv repository will be created with the following settings:", style="green")
|
||||||
|
print("")
|
||||||
|
print(f"\t[blue]:page_facing_up: Config path[/blue]: [bold yellow]{config_path}")
|
||||||
|
print(f"\t[blue]:computer: Device name[/blue]: [bold yellow]{device_name}")
|
||||||
|
print(f"\t[blue]:floppy_disk: Local repository[/blue]: [bold yellow]{local_repository_path}")
|
||||||
|
print("")
|
||||||
|
|
||||||
|
if interactive:
|
||||||
|
if not prompt_confirmation("Create repository ?"):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
create_repository(
|
||||||
|
config_path = config_path,
|
||||||
|
device_name = device_name,
|
||||||
|
local_repository_path = local_repository_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0
|
||||||
32
src/bsv.bak/exception.py
Normal file
32
src/bsv.bak/exception.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
class BsvError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NotFound(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnmappedPath(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnexpectedObjectType(BsvError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ConfigError(BsvError):
|
||||||
|
pass
|
||||||
@@ -22,7 +22,9 @@ import sys
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from bsv import __version__
|
from bsv import __version__
|
||||||
|
from bsv.cli import get_error_console, init_consoles
|
||||||
from bsv.command import init_commands
|
from bsv.command import init_commands
|
||||||
|
from bsv.util import default_bsv_config_path
|
||||||
|
|
||||||
|
|
||||||
def make_parser(
|
def make_parser(
|
||||||
@@ -31,10 +33,20 @@ def make_parser(
|
|||||||
) -> ArgumentParser:
|
) -> ArgumentParser:
|
||||||
parent_parser = ArgumentParser(add_help=False)
|
parent_parser = ArgumentParser(add_help=False)
|
||||||
parent_parser.add_argument(
|
parent_parser.add_argument(
|
||||||
"--repository",
|
"--color",
|
||||||
type = Path,
|
default = "auto",
|
||||||
|
choices = ("always", "auto", "never"),
|
||||||
help = dedent("""
|
help = dedent("""
|
||||||
Bsv repository path. Overides default paths and BSV_REPOSITORY environment variable.
|
Force or disable colors, or auto-detect terminal support.
|
||||||
|
""").strip(),
|
||||||
|
)
|
||||||
|
parent_parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
default = default_bsv_config_path(),
|
||||||
|
type = Path,
|
||||||
|
dest = "config_path",
|
||||||
|
help = dedent("""
|
||||||
|
Bsv config path. Overrides default paths and BSV_CONFIG environment variable.
|
||||||
""").strip(),
|
""").strip(),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -68,16 +80,16 @@ def main(
|
|||||||
)
|
)
|
||||||
arg_dict = vars(parser.parse_args(args or sys.argv[1:]))
|
arg_dict = vars(parser.parse_args(args or sys.argv[1:]))
|
||||||
|
|
||||||
repository_path: Path | None = arg_dict.pop("repository")
|
color = arg_dict.pop("color")
|
||||||
if repository_path is None and "BSV_REPOSITORY" in os.environ:
|
init_consoles(color=color)
|
||||||
repository_path = Path(os.environ["BSV_REPOSITORY"])
|
|
||||||
# else:
|
|
||||||
# for path in get_config_dirs():
|
|
||||||
# maybe_config_path = path / "config.toml"
|
|
||||||
# if maybe_config_path.is_file():
|
|
||||||
# config_path = maybe_config_path
|
|
||||||
# break
|
|
||||||
|
|
||||||
command = arg_dict.pop("command")
|
command = arg_dict.pop("command")
|
||||||
|
|
||||||
return command(repository_path=repository_path, **arg_dict)
|
try:
|
||||||
|
return command(**arg_dict)
|
||||||
|
except Exception as err:
|
||||||
|
get_error_console().print_exception()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
return 130
|
||||||
|
|
||||||
|
return 0
|
||||||
@@ -15,33 +15,30 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from dataclasses import dataclass
|
||||||
from typing import BinaryIO
|
|
||||||
|
|
||||||
|
|
||||||
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
data = stream.read(num_bytes)
|
class Digest:
|
||||||
if len(data) != num_bytes:
|
digest: bytes = b""
|
||||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
|
||||||
return data
|
|
||||||
|
|
||||||
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
def __bool__(self) -> bool:
|
||||||
data = stream.read(num_bytes)
|
return bool(self.digest)
|
||||||
if not data:
|
|
||||||
return None
|
def __repr__(self) -> str:
|
||||||
if len(data) != num_bytes:
|
return self.digest.hex()
|
||||||
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class Hash(ABC):
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
name: str
|
class ObjectInfo:
|
||||||
digest_size: int
|
digest: Digest
|
||||||
|
object_type: bytes
|
||||||
|
size: int
|
||||||
|
|
||||||
@abstractmethod
|
def __repr__(self) -> str:
|
||||||
def update(self, *data: bytes | bytearray | memoryview):
|
return f"<{self.__class__.__name__} {self.digest} {self.object_type.decode()} {self.size}B>"
|
||||||
...
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def digest(self) -> bytes:
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
...
|
class Object(ObjectInfo):
|
||||||
|
data: bytes
|
||||||
89
src/bsv.bak/path_map.py
Normal file
89
src/bsv.bak/path_map.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from itertools import pairwise
|
||||||
|
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from bsv.exception import UnmappedPath
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(order=True, frozen=True, slots=True)
|
||||||
|
class PathPair:
|
||||||
|
bsv: PurePosixPath
|
||||||
|
fs: Path
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.bsv.is_absolute() or not self.fs.is_absolute():
|
||||||
|
raise ValueError("paths in path_map must be absolute")
|
||||||
|
super().__setattr__("fs", self.fs.resolve())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: dict[str, str]) -> PathPair:
|
||||||
|
return cls(
|
||||||
|
bsv = PurePosixPath(obj["bsv"]),
|
||||||
|
fs = Path(obj["fs"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PathMap:
|
||||||
|
pairs: list[PathPair]
|
||||||
|
|
||||||
|
def __init__(self, pairs: list[PathPair]=[]):
|
||||||
|
self.pairs = sorted(pairs)
|
||||||
|
for path0, path1 in pairwise(self.pairs):
|
||||||
|
if path0 == path1 or path1.bsv.relative_to(path0.bsv):
|
||||||
|
raise ValueError("bsv paths must be unique and independent")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_obj(cls, obj: list[dict[str, str]]) -> PathMap:
|
||||||
|
return cls([
|
||||||
|
PathPair.from_obj(item)
|
||||||
|
for item in obj
|
||||||
|
])
|
||||||
|
|
||||||
|
def mount_point(self, fs_path: Path) -> PathPair:
|
||||||
|
fs_path = fs_path.resolve()
|
||||||
|
best_pair = None
|
||||||
|
best_relative = None
|
||||||
|
for pair in self.pairs:
|
||||||
|
try:
|
||||||
|
relative = fs_path.relative_to(pair.fs)
|
||||||
|
if not best_pair or not best_relative or len(relative.parts) < len(best_relative.parts):
|
||||||
|
best_pair = pair
|
||||||
|
best_relative = relative
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if best_pair is None:
|
||||||
|
raise UnmappedPath(f"unmapped fs path {fs_path}")
|
||||||
|
return best_pair
|
||||||
|
|
||||||
|
def relative_bsv_path(self, fs_target: Path, relative_to: Path) -> PurePosixPath:
|
||||||
|
if not relative_to.is_absolute():
|
||||||
|
relative_to = fs_target / relative_to
|
||||||
|
|
||||||
|
fs_target = fs_target.resolve()
|
||||||
|
relative_to = relative_to.resolve()
|
||||||
|
|
||||||
|
target_mount_point = self.mount_point(fs_target)
|
||||||
|
base_mount_point = self.mount_point(relative_to)
|
||||||
|
|
||||||
|
raise NotImplemented("not implemented yet")
|
||||||
|
|
||||||
|
def clone(self) -> PathMap:
|
||||||
|
return PathMap(self.pairs)
|
||||||
679
src/bsv.bak/repository.py
Normal file
679
src/bsv.bak/repository.py
Normal file
@@ -0,0 +1,679 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime as DateTime
|
||||||
|
import hashlib
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
import tomllib
|
||||||
|
from typing import TYPE_CHECKING, Any, BinaryIO, Self
|
||||||
|
|
||||||
|
from fastcdc import fastcdc
|
||||||
|
|
||||||
|
from bsv import __version__
|
||||||
|
from bsv.exception import ConfigError
|
||||||
|
from bsv.object import ObjectInfo
|
||||||
|
from bsv.path_map import PathMap
|
||||||
|
from bsv.simple_cas import SimpleCas
|
||||||
|
from bsv.simple_cas.cas import Digest, SimpleCas
|
||||||
|
from bsv.util import default_bsv_config_path, default_local_repository_path, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
||||||
|
DEFAULT_AVG_CHUNK_SIZE = 1 << 16
|
||||||
|
DEFAULT_MAX_CHUNK_SIZE = 1 << 20
|
||||||
|
|
||||||
|
|
||||||
|
class Repository:
|
||||||
|
_config_path: Path
|
||||||
|
_device_name: str
|
||||||
|
_local_repository_path: Path
|
||||||
|
|
||||||
|
_cas: SimpleCas
|
||||||
|
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
||||||
|
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
||||||
|
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
||||||
|
|
||||||
|
_path_map: PathMap
|
||||||
|
# _remotes: list[object]
|
||||||
|
|
||||||
|
_context_depth: int = 0
|
||||||
|
|
||||||
|
def __init__(self, config_path: Path):
|
||||||
|
self._config_path = config_path.resolve()
|
||||||
|
|
||||||
|
with self._config_path.open("rb") as stream:
|
||||||
|
config = tomllib.load(stream)
|
||||||
|
|
||||||
|
bsv = config.get("bsv", {})
|
||||||
|
def get(key: str) -> Any:
|
||||||
|
value = bsv.get(key)
|
||||||
|
if value is None:
|
||||||
|
raise ConfigError(f"invalid bsv configuration: missing bsv.{key} item")
|
||||||
|
return value
|
||||||
|
|
||||||
|
self._device_name = get("device_name")
|
||||||
|
self._local_repository_path = Path(get("local_repository"))
|
||||||
|
self._min_chunk_size = get("min_chunk_size")
|
||||||
|
self._avg_chunk_size = get("avg_chunk_size")
|
||||||
|
self._max_chunk_size = get("max_chunk_size")
|
||||||
|
self._path_map = PathMap.from_obj(get("path_map"))
|
||||||
|
|
||||||
|
self._cas = make_cas(self._local_repository_path)
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def config_path(self) -> Path:
|
||||||
|
return self._config_path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def device_name(self) -> str:
|
||||||
|
return self._device_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path_map(self) -> PathMap:
|
||||||
|
return self._path_map.clone()
|
||||||
|
|
||||||
|
def get_blob(self, digest: Digest) -> BlobObject:
|
||||||
|
with self:
|
||||||
|
obj, blob = self._read(digest, object_type=b"blob")
|
||||||
|
return BlobObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
blob = blob,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||||
|
with self:
|
||||||
|
return self._write(b"blob", stream, dry_run=dry_run)
|
||||||
|
|
||||||
|
def get_symlink(self, digest: Digest) -> SymlinkObject:
|
||||||
|
with self:
|
||||||
|
obj = self._cas.read(digest, object_type=b"slnk")
|
||||||
|
return SymlinkObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
symlink = Symlink.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_symlink(self, symlink: Symlink, *, dry_run: bool=False) -> SymlinkObject:
|
||||||
|
with self:
|
||||||
|
data = symlink.to_bytes()
|
||||||
|
return SymlinkObject(
|
||||||
|
digest = self._cas.write(b"slnk", data, dry_run=dry_run),
|
||||||
|
object_type = b"slnk",
|
||||||
|
size = len(data),
|
||||||
|
symlink = symlink,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_symlink_from_fs_target(self, fs_symlink: Path, fs_target: Path, *, dry_run: bool=False) -> SymlinkObject:
|
||||||
|
assert fs_symlink.is_absolute()
|
||||||
|
return self.add_symlink(
|
||||||
|
Symlink(
|
||||||
|
repo = self,
|
||||||
|
is_absolute = fs_target.is_absolute(),
|
||||||
|
target = self._path_map.relative_bsv_path(fs_target, relative_to=fs_symlink),
|
||||||
|
),
|
||||||
|
dry_run = dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_tree(self, digest: Digest) -> TreeObject:
|
||||||
|
with self:
|
||||||
|
obj = self._cas.read(digest, object_type=b"tree")
|
||||||
|
return TreeObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
tree = Tree.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_tree(self, tree: Tree, *, dry_run: bool=False) -> TreeObject:
|
||||||
|
with self:
|
||||||
|
data = tree.to_bytes()
|
||||||
|
return TreeObject(
|
||||||
|
digest = self._cas.write(b"tree", data, dry_run=dry_run),
|
||||||
|
object_type = b"tree",
|
||||||
|
size = len(data),
|
||||||
|
tree = tree,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> TreeObject:
|
||||||
|
from bsv.tree_walker import TreeWalker
|
||||||
|
walker = TreeWalker(self, dry_run=dry_run)
|
||||||
|
return walker.add_tree(path)
|
||||||
|
|
||||||
|
def get_snapshot(self, digest: Digest) -> SnapshotObject:
|
||||||
|
with self:
|
||||||
|
obj = self._cas.read(digest, object_type=b"snap")
|
||||||
|
return SnapshotObject(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = obj.object_type,
|
||||||
|
size = obj.size,
|
||||||
|
snapshot = Snapshot.from_bytes(self, obj.data),
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> SnapshotObject:
|
||||||
|
with self:
|
||||||
|
data = snapshot.to_bytes()
|
||||||
|
return SnapshotObject(
|
||||||
|
digest = self._cas.write(b"snap", data, dry_run=dry_run),
|
||||||
|
object_type = b"snap",
|
||||||
|
size = len(data),
|
||||||
|
snapshot = snapshot,
|
||||||
|
)
|
||||||
|
|
||||||
|
# def take_snapshot(
|
||||||
|
# self,
|
||||||
|
# parent_digests: list[Digest] = [],
|
||||||
|
# *,
|
||||||
|
# walker_type: Type[TreeWalker] | None = None,
|
||||||
|
# dry_run: bool = False,
|
||||||
|
# ):
|
||||||
|
# from bsv.tree_walker import TreeWalker
|
||||||
|
|
||||||
|
# walker = (walker_type or TreeWalker)(self, dry_run=dry_run)
|
||||||
|
|
||||||
|
# # parents = [
|
||||||
|
# # self.get_snapshot(digest)
|
||||||
|
# # for digest in parent_digests
|
||||||
|
# # ]
|
||||||
|
# parent = self.get_snapshot(parent_digests[0]) if parent_digests else None
|
||||||
|
|
||||||
|
# snapshot = Snapshot(
|
||||||
|
# repo = self,
|
||||||
|
# tree_digest = walker.add_virtual_tree(self._path_map, parent=),
|
||||||
|
# parents = parent_digests,
|
||||||
|
# repo_name = self._name,
|
||||||
|
# timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# )
|
||||||
|
# return self.add_snapshot(snapshot, dry_run=dry_run)
|
||||||
|
|
||||||
|
|
||||||
|
def get_ref(self, key: str) -> Digest | None:
|
||||||
|
return self._cas.get_ref(key)
|
||||||
|
|
||||||
|
def set_ref(self, key: str, digest: Digest):
|
||||||
|
self._cas.set_ref(key, digest)
|
||||||
|
|
||||||
|
def get_head_snapshot(self) -> Digest | None:
|
||||||
|
return self.get_ref("HEAD")
|
||||||
|
|
||||||
|
|
||||||
|
def _read(self, digest: Digest, object_type: bytes) -> tuple[ObjectInfo, Blob]:
|
||||||
|
obj = self._cas.read(digest, object_type=object_type)
|
||||||
|
stream = BytesIO(obj.data)
|
||||||
|
return obj, Blob.from_stream(self, stream, digest_size=self._cas._digest_size)
|
||||||
|
|
||||||
|
def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
|
||||||
|
out = BytesIO()
|
||||||
|
size = 0
|
||||||
|
chunks = []
|
||||||
|
for chunk in fastcdc(
|
||||||
|
stream,
|
||||||
|
min_size = self._min_chunk_size,
|
||||||
|
avg_size = self._avg_chunk_size,
|
||||||
|
max_size = self._max_chunk_size,
|
||||||
|
fat = True,
|
||||||
|
):
|
||||||
|
size += chunk.length
|
||||||
|
digest = self._cas.write(b"chnk", chunk.data, dry_run=dry_run)
|
||||||
|
chunks.append(Chunk(digest, chunk.length))
|
||||||
|
out.write(digest.digest)
|
||||||
|
out.write(chunk.length.to_bytes(4))
|
||||||
|
return BlobObject(
|
||||||
|
digest = self._cas.write(object_type, size.to_bytes(8) + out.getvalue()),
|
||||||
|
object_type = object_type,
|
||||||
|
size = 8 + len(out.getvalue()),
|
||||||
|
blob = Blob(
|
||||||
|
repo = self,
|
||||||
|
size = size,
|
||||||
|
chunks = chunks,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
if self._context_depth == 0:
|
||||||
|
self._cas.__enter__()
|
||||||
|
self._context_depth += 1
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
|
self._context_depth -= 1
|
||||||
|
if self._context_depth == 0:
|
||||||
|
return self._cas.__exit__(exc_type, exc_value, traceback)
|
||||||
|
|
||||||
|
|
||||||
|
def check_config_path(path: Path):
|
||||||
|
if path.exists():
|
||||||
|
raise ValueError(f"{path} already exists.")
|
||||||
|
if path != default_bsv_config_path() and not path.parent.is_dir():
|
||||||
|
raise ValueError(f"{path.parent} does not exist or is not a directory.")
|
||||||
|
|
||||||
|
def check_device_name(device_name: str):
|
||||||
|
if not device_name:
|
||||||
|
raise ValueError("Device name cannot be empty.")
|
||||||
|
if not device_name.isidentifier():
|
||||||
|
raise ValueError(f"{device_name} is not a valid device name.")
|
||||||
|
|
||||||
|
def check_local_repository_path(path: Path):
|
||||||
|
if path != default_local_repository_path() and not path.parent.exists():
|
||||||
|
raise ValueError(f"Directory {path.parent} does not exists.")
|
||||||
|
if path.exists() and not path.is_dir():
|
||||||
|
raise ValueError(f"{path} exists but is not a directory.")
|
||||||
|
if path.exists() and len(list(path.iterdir())):
|
||||||
|
raise ValueError(f"Local repository directory {path} is not empty.")
|
||||||
|
|
||||||
|
def create_repository(
|
||||||
|
config_path: Path,
|
||||||
|
device_name: str,
|
||||||
|
local_repository_path: Path,
|
||||||
|
cas: str = "simple",
|
||||||
|
hash: str = "sha256",
|
||||||
|
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
||||||
|
avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE,
|
||||||
|
max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
|
||||||
|
):
|
||||||
|
from datetime import datetime as DateTime
|
||||||
|
from os import getlogin
|
||||||
|
|
||||||
|
import tomlkit
|
||||||
|
|
||||||
|
check_config_path(config_path)
|
||||||
|
check_device_name(device_name)
|
||||||
|
check_local_repository_path(local_repository_path)
|
||||||
|
|
||||||
|
if config_path == default_bsv_config_path():
|
||||||
|
try:
|
||||||
|
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to create bsv config destination directory {config_path.parent}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
local_repository_path.mkdir(exist_ok=True)
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to create local repository directory {local_repository_path}")
|
||||||
|
|
||||||
|
bsv_table = tomlkit.table()
|
||||||
|
|
||||||
|
bsv_table.add(tomlkit.comment("Name of the instance."))
|
||||||
|
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected devices."))
|
||||||
|
bsv_table.add("device_name", device_name)
|
||||||
|
|
||||||
|
bsv_table.add(tomlkit.nl())
|
||||||
|
bsv_table.add(tomlkit.comment("Path to the local repository."))
|
||||||
|
bsv_table.add("local_repository", str(local_repository_path))
|
||||||
|
|
||||||
|
bsv_table.add(tomlkit.nl())
|
||||||
|
bsv_table.add(tomlkit.comment("Properties of the content-based chunking algorithm."))
|
||||||
|
bsv_table.add("min_chunk_size", min_chunk_size)
|
||||||
|
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
||||||
|
bsv_table.add("max_chunk_size", max_chunk_size)
|
||||||
|
|
||||||
|
bsv_table.add(tomlkit.nl())
|
||||||
|
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the local filesystem."))
|
||||||
|
bsv_table.add("path_map", tomlkit.array())
|
||||||
|
|
||||||
|
bsv_config = tomlkit.document()
|
||||||
|
bsv_config.add(tomlkit.comment("bsv device configuration"))
|
||||||
|
bsv_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||||
|
bsv_config.add(tomlkit.nl())
|
||||||
|
bsv_config.add("bsv", bsv_table)
|
||||||
|
|
||||||
|
|
||||||
|
cas_table = tomlkit.table()
|
||||||
|
cas_table.add("type", cas)
|
||||||
|
cas_table.add("hash", hash)
|
||||||
|
|
||||||
|
cas_config = tomlkit.document()
|
||||||
|
cas_config.add(tomlkit.comment(f"bsv local repository configuration for instance {config_path}."))
|
||||||
|
cas_config.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
||||||
|
cas_config.add(tomlkit.nl())
|
||||||
|
cas_config.add("cas", cas_table)
|
||||||
|
|
||||||
|
try:
|
||||||
|
bsv_stream = config_path.open("w", encoding="utf-8")
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to open bsv configuration file {config_path}")
|
||||||
|
try:
|
||||||
|
cas_stream = (local_repository_path / "bsv_repository.config").open("w", encoding="utf-8")
|
||||||
|
except:
|
||||||
|
raise RuntimeError(f"failed to open local repository configuration file {config_path}")
|
||||||
|
|
||||||
|
with bsv_stream:
|
||||||
|
tomlkit.dump(bsv_config, bsv_stream)
|
||||||
|
|
||||||
|
with cas_stream:
|
||||||
|
tomlkit.dump(cas_config, cas_stream)
|
||||||
|
|
||||||
|
repo = Repository(config_path)
|
||||||
|
|
||||||
|
return repo
|
||||||
|
|
||||||
|
|
||||||
|
def make_cas(cas_config_path: Path) -> SimpleCas:
|
||||||
|
with (cas_config_path / "bsv_repository.config").open("rb") as stream:
|
||||||
|
config = tomllib.load(stream)
|
||||||
|
|
||||||
|
cas = config.get("cas", {})
|
||||||
|
def get(key: str) -> Any:
|
||||||
|
value = cas.get(key)
|
||||||
|
if value is None:
|
||||||
|
raise ConfigError(f"invalid repository configuration: missing {key} item")
|
||||||
|
return value
|
||||||
|
|
||||||
|
type = get("type")
|
||||||
|
hash_factory = lambda: hashlib.new(get("hash"))
|
||||||
|
if type == "simple":
|
||||||
|
return SimpleCas(cas_config_path, hash_factory) # type: ignore
|
||||||
|
raise ConfigError(f"unknown cas type {type}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ChunkedObject:
|
||||||
|
repo: Repository
|
||||||
|
size: int
|
||||||
|
chunks: list[Chunk]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> Self:
|
||||||
|
self = cls(
|
||||||
|
repo = repo,
|
||||||
|
size = int.from_bytes(read_exact(stream, 8)),
|
||||||
|
chunks = [],
|
||||||
|
)
|
||||||
|
while (chunk := Chunk.from_stream(stream, digest_size)) is not None:
|
||||||
|
self.chunks.append(chunk)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def reader(self) -> ChunkedObjectReader:
|
||||||
|
return ChunkedObjectReader(self)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class Chunk:
|
||||||
|
digest: Digest
|
||||||
|
size: int
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||||
|
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||||
|
if digest_bytes is None:
|
||||||
|
return None
|
||||||
|
digest = Digest(digest_bytes)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
digest = digest,
|
||||||
|
size = int.from_bytes(read_exact(stream, 4)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkedObjectReader:
|
||||||
|
_chunked_object: ChunkedObject
|
||||||
|
_chunk_index: int = 0
|
||||||
|
_chunk_data: bytes = b""
|
||||||
|
|
||||||
|
def __init__(self, chunked_object: ChunkedObject):
|
||||||
|
self._chunked_object = chunked_object
|
||||||
|
|
||||||
|
def read(self, num_bytes: int = -1) -> bytes:
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
|
parts = [self._chunk_data]
|
||||||
|
size = len(parts[-1])
|
||||||
|
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
|
||||||
|
parts.append(self.read1())
|
||||||
|
size += len(parts[-1])
|
||||||
|
if num_bytes >= 0:
|
||||||
|
self._chunk_data = parts[-1][num_bytes - size:]
|
||||||
|
else:
|
||||||
|
self._chunk_data = b""
|
||||||
|
return b"".join(parts)
|
||||||
|
|
||||||
|
def read1(self) -> bytes:
|
||||||
|
cas = self._chunked_object.repo._cas
|
||||||
|
chunks = self._chunked_object.chunks
|
||||||
|
if self._chunk_index == len(chunks):
|
||||||
|
return b""
|
||||||
|
object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
|
||||||
|
self._chunk_index += 1
|
||||||
|
return object.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Blob(ChunkedObject):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class BlobObject(ObjectInfo):
|
||||||
|
blob: Blob
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Symlink:
|
||||||
|
repo: Repository
|
||||||
|
is_absolute: bool
|
||||||
|
target: PurePosixPath
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
|
return cls(
|
||||||
|
repo = repo,
|
||||||
|
is_absolute = bool(read_exact(stream, 1)),
|
||||||
|
target = PurePosixPath(stream.read().decode("utf-8")),
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_bytes(cls, repo: Repository, bytes: bytes) -> Self:
|
||||||
|
stream = BytesIO(bytes)
|
||||||
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
|
def write(self, stream: BinaryIO):
|
||||||
|
stream.write(self.is_absolute.to_bytes(1))
|
||||||
|
stream.write(self.target.as_posix().encode("utf-8"))
|
||||||
|
|
||||||
|
def to_bytes(self) -> bytes:
|
||||||
|
stream = BytesIO()
|
||||||
|
self.write(stream)
|
||||||
|
return stream.getvalue()
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class SymlinkObject(ObjectInfo):
|
||||||
|
symlink: Symlink
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Tree:
|
||||||
|
repo: Repository
|
||||||
|
items: list[TreeItem]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_size(self) -> int:
|
||||||
|
return sum(
|
||||||
|
item.size
|
||||||
|
for item in self.items
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
|
tree = Tree(repo, [])
|
||||||
|
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
||||||
|
tree.items.append(item)
|
||||||
|
return tree
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||||
|
stream = BytesIO(data)
|
||||||
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
|
def write(self, stream: BinaryIO):
|
||||||
|
self.items.sort(key=lambda i: i.name)
|
||||||
|
for item in self.items:
|
||||||
|
item.write(stream)
|
||||||
|
|
||||||
|
def to_bytes(self) -> bytes:
|
||||||
|
stream = BytesIO()
|
||||||
|
self.write(stream)
|
||||||
|
return stream.getvalue()
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.items)
|
||||||
|
|
||||||
|
def get(self, key: str) -> TreeItem | None:
|
||||||
|
for item in self.items:
|
||||||
|
if item.name == key:
|
||||||
|
return item
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __getitem__(self, key: str) -> TreeItem:
|
||||||
|
item = self.get(key)
|
||||||
|
if item is None:
|
||||||
|
raise KeyError(f"{key} not found")
|
||||||
|
return item
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class TreeObject(ObjectInfo):
|
||||||
|
tree: Tree
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_size(self) -> int:
|
||||||
|
return self.size + self.tree.total_size
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TreeItem:
|
||||||
|
digest: Digest
|
||||||
|
object_type: bytes
|
||||||
|
size: int
|
||||||
|
permissions: int
|
||||||
|
modification_timestamp_us: int
|
||||||
|
name: str
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
digest: Digest,
|
||||||
|
object_type: bytes,
|
||||||
|
size: int,
|
||||||
|
permissions: int,
|
||||||
|
modification_timestamp_us: int,
|
||||||
|
name: str,
|
||||||
|
):
|
||||||
|
if "/\\" in name:
|
||||||
|
raise ValueError(f"invalid tree item name {name}")
|
||||||
|
self.digest = digest
|
||||||
|
self.object_type = object_type
|
||||||
|
self.size = size
|
||||||
|
self.permissions = permissions
|
||||||
|
self.modification_timestamp_us = modification_timestamp_us
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def modification_time(self) -> DateTime:
|
||||||
|
return time_from_timestamp_us(self.modification_timestamp_us)
|
||||||
|
@modification_time.setter
|
||||||
|
def modification_time(self, time: DateTime):
|
||||||
|
self.modification_timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
|
||||||
|
digest_bytes = read_exact_or_eof(stream, digest_size)
|
||||||
|
if digest_bytes is None:
|
||||||
|
return None
|
||||||
|
return TreeItem(
|
||||||
|
digest = Digest(digest_bytes),
|
||||||
|
object_type = read_exact(stream, 4),
|
||||||
|
size = int.from_bytes(read_exact(stream, 8)),
|
||||||
|
permissions = int.from_bytes(read_exact(stream, 2)),
|
||||||
|
modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
|
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def write(self, stream: BinaryIO):
|
||||||
|
stream.write(self.digest.digest)
|
||||||
|
stream.write(self.object_type)
|
||||||
|
stream.write(self.size.to_bytes(8))
|
||||||
|
stream.write(self.permissions.to_bytes(2))
|
||||||
|
stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
|
||||||
|
name_bytes = self.name.encode("utf-8")
|
||||||
|
stream.write(len(name_bytes).to_bytes(2))
|
||||||
|
stream.write(name_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Snapshot:
|
||||||
|
repo: Repository
|
||||||
|
tree_digest: Digest
|
||||||
|
parents: list[Digest]
|
||||||
|
repo_name: str
|
||||||
|
timestamp_us: int
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
assert len(self.parents) < 256
|
||||||
|
|
||||||
|
@property
|
||||||
|
def time(self) -> DateTime:
|
||||||
|
return time_from_timestamp_us(self.timestamp_us)
|
||||||
|
@time.setter
|
||||||
|
def time(self, time: DateTime):
|
||||||
|
self.timestamp_us = timestamp_us_from_time(time)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
|
||||||
|
return Snapshot(
|
||||||
|
repo = repo,
|
||||||
|
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
||||||
|
parents = [
|
||||||
|
Digest(read_exact(stream, repo._cas._digest_size))
|
||||||
|
for _ in range(int.from_bytes(read_exact(stream, 1)))
|
||||||
|
],
|
||||||
|
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
||||||
|
timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_bytes(cls, repo: Repository, data: bytes) -> Self:
|
||||||
|
stream = BytesIO(data)
|
||||||
|
return cls.from_stream(repo, stream)
|
||||||
|
|
||||||
|
def write(self, stream: BinaryIO):
|
||||||
|
assert len(self.parents) < 256
|
||||||
|
stream.write(self.tree_digest.digest)
|
||||||
|
stream.write(len(self.parents).to_bytes(1))
|
||||||
|
for parent in self.parents:
|
||||||
|
stream.write(parent.digest)
|
||||||
|
repo_name_bytes = self.repo_name.encode("utf-8")
|
||||||
|
stream.write(len(repo_name_bytes).to_bytes(2))
|
||||||
|
stream.write(repo_name_bytes)
|
||||||
|
stream.write(self.timestamp_us.to_bytes(8, signed=True))
|
||||||
|
|
||||||
|
def to_bytes(self) -> bytes:
|
||||||
|
stream = BytesIO()
|
||||||
|
self.write(stream)
|
||||||
|
return stream.getvalue()
|
||||||
|
|
||||||
|
@dataclass(frozen=True, order=True, slots=True)
|
||||||
|
class SnapshotObject(ObjectInfo):
|
||||||
|
snapshot: Snapshot
|
||||||
@@ -19,21 +19,10 @@ from dataclasses import dataclass
|
|||||||
import hashlib
|
import hashlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, BinaryIO, Callable, Iterator
|
from typing import Any, BinaryIO, Callable, Iterator
|
||||||
|
from bsv.exception import NotFound, UnexpectedObjectType
|
||||||
|
from bsv.object import Digest, Object, ObjectInfo
|
||||||
|
|
||||||
from bsv.simple_cas.util import Hash, read_exact_or_eof
|
from bsv.util import Hash, read_exact_or_eof
|
||||||
|
|
||||||
|
|
||||||
class BsvError(RuntimeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class NotFound(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class UnexpectedObjectType(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ConfigError(BsvError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleCas:
|
class SimpleCas:
|
||||||
@@ -105,21 +94,19 @@ class SimpleCas:
|
|||||||
assert size == item.size
|
assert size == item.size
|
||||||
data = stream.read(size)
|
data = stream.read(size)
|
||||||
|
|
||||||
return Object(object_type, data)
|
return Object(digest, object_type, size, data)
|
||||||
|
|
||||||
def write(self, object_type: bytes, data: bytes) -> Digest:
|
def write(self, object_type: bytes, data: bytes, dry_run: bool=False) -> Digest:
|
||||||
assert len(object_type) == 4
|
assert len(object_type) == 4
|
||||||
assert len(data) < 2**32
|
assert len(data) < 2**32
|
||||||
|
|
||||||
hash = self._hash_factory()
|
hash = self._hash_factory()
|
||||||
hash.update(object_type)
|
hash.update(object_type)
|
||||||
hash.update(b"\0")
|
|
||||||
hash.update(len(data).to_bytes(4))
|
hash.update(len(data).to_bytes(4))
|
||||||
hash.update(b"\0")
|
|
||||||
hash.update(data)
|
hash.update(data)
|
||||||
digest = Digest(hash.digest())
|
digest = Digest(hash.digest())
|
||||||
|
|
||||||
if digest not in self:
|
if not dry_run and digest not in self:
|
||||||
with self._open_writer(digest, object_type, len(data)) as out:
|
with self._open_writer(digest, object_type, len(data)) as out:
|
||||||
out.write(digest.digest)
|
out.write(digest.digest)
|
||||||
out.write(object_type)
|
out.write(object_type)
|
||||||
@@ -164,23 +151,7 @@ class SimpleCas:
|
|||||||
return self._root_dir / "refs" / key_path
|
return self._root_dir / "refs" / key_path
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True, order=True, slots=True)
|
@dataclass(frozen=True)
|
||||||
class Digest:
|
|
||||||
digest: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return self.digest.hex()
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Object:
|
|
||||||
object_type: bytes
|
|
||||||
data: bytes
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<Object {self.object_type.decode()}: {len(self.data)}B>"
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class IndexItem:
|
class IndexItem:
|
||||||
object_type: bytes
|
object_type: bytes
|
||||||
offset: int
|
offset: int
|
||||||
@@ -188,12 +159,3 @@ class IndexItem:
|
|||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
return f"<IndexItem {self.object_type.decode()}: {self.offset}B +{self.size}B>"
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ObjectInfo:
|
|
||||||
digest: Digest
|
|
||||||
object_type: bytes
|
|
||||||
size: int
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"<ObjectInfo {self.digest} {self.object_type.decode()} {self.size}B>"
|
|
||||||
268
src/bsv.bak/tree_walker.py
Normal file
268
src/bsv.bak/tree_walker.py
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime as DateTime, timedelta as TimeDelta
|
||||||
|
from enum import Enum
|
||||||
|
from os import stat_result
|
||||||
|
from pathlib import Path
|
||||||
|
import stat
|
||||||
|
|
||||||
|
from bsv.object import Digest, ObjectInfo
|
||||||
|
from bsv.repository import BlobObject, Repository, SymlinkObject, Tree, TreeItem, TreeObject
|
||||||
|
from bsv.util import is_bsv_repository, object_type_from_mode
|
||||||
|
|
||||||
|
|
||||||
|
class Action(Enum):
|
||||||
|
ADD = "add"
|
||||||
|
UPDATE = "update"
|
||||||
|
REMOVE = "remove"
|
||||||
|
IGNORE = "ignore"
|
||||||
|
ERROR = "error"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_digests(cls, digest: Digest, source_digest: Digest | None) -> tuple[Action, IgnoreCause | None]:
|
||||||
|
assert digest
|
||||||
|
if not source_digest:
|
||||||
|
return Action.ADD, None
|
||||||
|
elif source_digest == digest:
|
||||||
|
return Action.IGNORE, IgnoreCause.UNCHANGED
|
||||||
|
else:
|
||||||
|
return Action.UPDATE, None
|
||||||
|
|
||||||
|
class IgnoreCause(Enum):
|
||||||
|
IGNORE_RULE = "ignore_rule"
|
||||||
|
UNCHANGED = "unchanged"
|
||||||
|
UNSUPPORTED_TYPE = "unsupported_type"
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker:
|
||||||
|
_repo: Repository
|
||||||
|
_time_rounding_us: int = 2000000
|
||||||
|
_force_hash: bool = False
|
||||||
|
_dry_run: bool = False
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
repo: Repository,
|
||||||
|
*,
|
||||||
|
time_rounding_us: int = 2000000,
|
||||||
|
force_hash: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
):
|
||||||
|
self._repo = repo
|
||||||
|
self._time_rounding_us = time_rounding_us
|
||||||
|
self._force_hash = force_hash
|
||||||
|
self._dry_run = dry_run
|
||||||
|
|
||||||
|
# def add_virtual_tree(self, paths: PathMap) -> Digest:
|
||||||
|
# assert paths
|
||||||
|
|
||||||
|
# fs_paths = sorted([
|
||||||
|
# pair.fs
|
||||||
|
# for pair in paths.pairs
|
||||||
|
# ])
|
||||||
|
|
||||||
|
# tree_map = {
|
||||||
|
# fs_paths[0]: self.add_tree()
|
||||||
|
# }
|
||||||
|
# root = {}
|
||||||
|
# for pair in paths.paths:
|
||||||
|
# vdir = root
|
||||||
|
# for part in pair.bsv.parts[:-1]:
|
||||||
|
# vdir = vdir.setdefault(part, {})
|
||||||
|
# vdir[pair.bsv.parts[-1]] = pair.fs
|
||||||
|
|
||||||
|
# return self._add_virtual_tree(root)
|
||||||
|
|
||||||
|
# def _add_virtual_tree(self, vtree: dict[str, dict | Path]) -> Digest:
|
||||||
|
# tree = Tree(self._repo, [])
|
||||||
|
# for name, value in vtree.items():
|
||||||
|
# if isinstance(value, dict):
|
||||||
|
# digest = self._add_virtual_tree(value)
|
||||||
|
# elif isinstance(value, Path):
|
||||||
|
# digest = self.add_tree(value)
|
||||||
|
# else:
|
||||||
|
# raise TypeError(f"unexpected type {type(vtree).__name__} for vtree")
|
||||||
|
# tree.items.append(TreeItem(
|
||||||
|
# digest = digest,
|
||||||
|
# object_type = b"tree",
|
||||||
|
# size = 0,
|
||||||
|
# permissions = 0o766,
|
||||||
|
# modification_timestamp = timestamp_us_from_time(DateTime.now()),
|
||||||
|
# name = name,
|
||||||
|
# ))
|
||||||
|
# return self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
def add_tree(self, path: Path, *, source_digest: Digest | None=None) -> TreeObject:
|
||||||
|
pstat = path.stat(follow_symlinks=False)
|
||||||
|
if self.ignore(path, pstat):
|
||||||
|
raise ValueError(f"path {path} is ignored")
|
||||||
|
# self.report(Action.IGNORE, path, pstat, IgnoreCause.IGNORE_RULE)
|
||||||
|
# return Digest()
|
||||||
|
return self._add_tree(path, pstat, source_digest=source_digest)
|
||||||
|
|
||||||
|
def _add_tree(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> TreeObject:
|
||||||
|
source = self._repo.get_tree(source_digest).tree if source_digest else None
|
||||||
|
|
||||||
|
tree = Tree(self._repo, [])
|
||||||
|
subpaths = sorted(path.iterdir())
|
||||||
|
|
||||||
|
subpath_index = 0
|
||||||
|
source_item_index = 0
|
||||||
|
|
||||||
|
while subpath_index < len(subpaths) or (source and source_item_index < len(source.items)):
|
||||||
|
subpath = subpaths[subpath_index] if subpath_index < len(subpaths) else None
|
||||||
|
source_item = source.items[source_item_index] if source and source_item_index < len(source.items) else None
|
||||||
|
|
||||||
|
if subpath and source_item:
|
||||||
|
if subpath.name < source_item.name:
|
||||||
|
source_item = None
|
||||||
|
elif subpath.name > source_item.name:
|
||||||
|
subpath = None
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
subpath_index += 1
|
||||||
|
if source_item is not None:
|
||||||
|
source_item_index += 1
|
||||||
|
|
||||||
|
if subpath is not None:
|
||||||
|
obj: ObjectInfo | None = None
|
||||||
|
try:
|
||||||
|
istat = subpath.lstat()
|
||||||
|
|
||||||
|
if self.ignore(subpath, istat, source=source_item):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.IGNORE_RULE)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if (source_item is not None and
|
||||||
|
not self._force_hash and
|
||||||
|
not stat.S_ISDIR(istat.st_mode) and
|
||||||
|
pstat.st_size == source_item.size and
|
||||||
|
pstat.st_mtime_ns // (1000 * self._time_rounding_us) == source_item.modification_timestamp_us // self._time_rounding_us
|
||||||
|
):
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNCHANGED)
|
||||||
|
tree.items.append(source_item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sub_source_digest = source_item and source_item.digest
|
||||||
|
object_type = object_type_from_mode(istat.st_mode)
|
||||||
|
if object_type == b"slnk":
|
||||||
|
obj = self._add_symlink(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = obj.size
|
||||||
|
elif object_type == b"tree":
|
||||||
|
obj = self._add_tree(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = obj.total_size
|
||||||
|
elif object_type == b"blob":
|
||||||
|
obj = self._add_blob(subpath, istat, source_digest=sub_source_digest)
|
||||||
|
size = istat.st_size
|
||||||
|
else:
|
||||||
|
self.report(Action.IGNORE, subpath, istat, IgnoreCause.UNSUPPORTED_TYPE)
|
||||||
|
continue
|
||||||
|
except Exception as err:
|
||||||
|
self.report(Action.ERROR, subpath, None, err)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if obj:
|
||||||
|
tree.items.append(TreeItem(
|
||||||
|
digest = obj.digest,
|
||||||
|
object_type = object_type,
|
||||||
|
size = size,
|
||||||
|
permissions = stat.S_IMODE(istat.st_mode),
|
||||||
|
modification_timestamp_us = istat.st_mtime_ns // 1000,
|
||||||
|
name = subpath.name,
|
||||||
|
))
|
||||||
|
elif source_item:
|
||||||
|
self.report(Action.REMOVE, path / source_item.name, None, source_item)
|
||||||
|
|
||||||
|
tree_object = self._repo.add_tree(tree, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(tree_object.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return tree_object
|
||||||
|
|
||||||
|
|
||||||
|
def _add_symlink(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> SymlinkObject:
|
||||||
|
# TODO: Store symlink relative to current dir ?
|
||||||
|
# * What about symlink that points outside of the backup dirs
|
||||||
|
# * Should symlinks that points inside the backup dirs but in another
|
||||||
|
# mount-point adjusted ?
|
||||||
|
# * Should absolute symlink be restored as absolute ?
|
||||||
|
obj = self._repo.add_symlink_from_fs_target(path, path.readlink())
|
||||||
|
|
||||||
|
action, info = Action.from_digests(obj.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def _add_blob(self, path: Path, pstat: stat_result, *, source_digest: Digest | None) -> BlobObject:
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
obj = self._repo.add_blob(stream, dry_run=self._dry_run)
|
||||||
|
|
||||||
|
action, info = Action.from_digests(obj.digest, source_digest)
|
||||||
|
self.report(action, path, pstat, info)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def ignore(self, path: Path, pstat: stat_result, *, source: TreeItem | None=None) -> bool:
|
||||||
|
return is_bsv_repository(path)
|
||||||
|
|
||||||
|
def report(self, action: Action, path: Path, pstat: stat_result | None, info: IgnoreCause | Exception | TreeItem | None=None):
|
||||||
|
match action, info:
|
||||||
|
case (Action.ADD, None):
|
||||||
|
print(f"Add: {path}")
|
||||||
|
case (Action.UPDATE, None):
|
||||||
|
print(f"Add: {path}")
|
||||||
|
case (Action.REMOVE, item) if isinstance(item, TreeItem):
|
||||||
|
print(f"Remove: {path / item.name}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.IGNORE_RULE):
|
||||||
|
print(f"Ignore (rule): {path}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.UNCHANGED):
|
||||||
|
print(f"Ignore (unchanged): {path}")
|
||||||
|
case (Action.IGNORE, IgnoreCause.UNSUPPORTED_TYPE) if pstat is not None:
|
||||||
|
print(f"Ignore (unsupported type {path_type_name(pstat)}): {path}")
|
||||||
|
case (Action.ERROR, _) if isinstance(info, Exception):
|
||||||
|
print(f"Error {info}: {path}")
|
||||||
|
case _:
|
||||||
|
raise ValueError("TreeWalker.report(): unsupported parameter combination")
|
||||||
|
|
||||||
|
|
||||||
|
def path_type_name(pstat: stat_result) -> str:
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if stat.S_ISBLK(pstat.st_mode):
|
||||||
|
parts.append("block_device")
|
||||||
|
if stat.S_ISCHR(pstat.st_mode):
|
||||||
|
parts.append("char_device")
|
||||||
|
if stat.S_ISDIR(pstat.st_mode):
|
||||||
|
parts.append("dir")
|
||||||
|
if stat.S_ISDOOR(pstat.st_mode):
|
||||||
|
parts.append("door")
|
||||||
|
if stat.S_ISFIFO(pstat.st_mode):
|
||||||
|
parts.append("fifo")
|
||||||
|
if stat.S_ISLNK(pstat.st_mode):
|
||||||
|
parts.append("symlink")
|
||||||
|
if stat.S_ISPORT(pstat.st_mode):
|
||||||
|
parts.append("port")
|
||||||
|
if stat.S_ISREG(pstat.st_mode):
|
||||||
|
parts.append("file")
|
||||||
|
if stat.S_ISSOCK(pstat.st_mode):
|
||||||
|
parts.append("socket")
|
||||||
|
if stat.S_ISWHT(pstat.st_mode):
|
||||||
|
parts.append("whiteout")
|
||||||
|
|
||||||
|
if not parts:
|
||||||
|
return "unknown"
|
||||||
|
return ", ".join(parts)
|
||||||
118
src/bsv.bak/util.py
Normal file
118
src/bsv.bak/util.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import platform
|
||||||
|
import stat
|
||||||
|
from typing import BinaryIO
|
||||||
|
|
||||||
|
|
||||||
|
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def time_from_timestamp_us(timestamp: int) -> DateTime:
|
||||||
|
return EPOCH + TimeDelta(microseconds=timestamp)
|
||||||
|
|
||||||
|
def timestamp_us_from_time(time: DateTime) -> int:
|
||||||
|
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
||||||
|
|
||||||
|
|
||||||
|
def read_exact(stream: BinaryIO, num_bytes: int) -> bytes:
|
||||||
|
data = stream.read(num_bytes)
|
||||||
|
if len(data) != num_bytes:
|
||||||
|
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
def read_exact_or_eof(stream: BinaryIO, num_bytes: int) -> bytes | None:
|
||||||
|
data = stream.read(num_bytes)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
if len(data) != num_bytes:
|
||||||
|
raise IOError(f"expected {num_bytes} bytes, got {len(data)}")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def is_bsv_repository(path: Path) -> bool:
|
||||||
|
return (path / "bsv_repository.config").is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def object_type_from_path(path: Path) -> bytes:
|
||||||
|
return object_type_from_mode(path.stat(follow_symlinks=False).st_mode)
|
||||||
|
|
||||||
|
def object_type_from_mode(mode: int) -> bytes:
|
||||||
|
if stat.S_ISLNK(mode):
|
||||||
|
return b"slnk"
|
||||||
|
elif stat.S_ISDIR(mode):
|
||||||
|
return b"tree"
|
||||||
|
elif stat.S_ISREG(mode):
|
||||||
|
return b"blob"
|
||||||
|
return b""
|
||||||
|
|
||||||
|
|
||||||
|
def default_bsv_config_path() -> Path:
|
||||||
|
path = Path(os.environ.get("BSV_CONFIG", ""))
|
||||||
|
if path and path.is_absolute() and path.is_file():
|
||||||
|
return path
|
||||||
|
for path in user_config_dirs():
|
||||||
|
if path.is_file():
|
||||||
|
return path
|
||||||
|
return user_config_home() / "bsv/config"
|
||||||
|
|
||||||
|
def default_local_repository_path() -> Path:
|
||||||
|
return user_data_home() / "bsv"
|
||||||
|
|
||||||
|
|
||||||
|
def user_data_home() -> Path:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
path = Path(os.environ.get("XDG_DATA_HOME", ""))
|
||||||
|
if path and path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path.home() / ".local/share"
|
||||||
|
|
||||||
|
def user_config_home() -> Path:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
path = Path(os.environ.get("XDG_CONFIG_HOME", ""))
|
||||||
|
if path and path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path.home() / ".config"
|
||||||
|
|
||||||
|
def user_config_dirs() -> list[Path]:
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
raise NotImplemented(f"{platform.system()} support not implemented yet")
|
||||||
|
else: # Assume Unix
|
||||||
|
paths = list(filter(Path.is_absolute, map(Path, (os.environ.get("XDG_CONFIG_DIRS") or "/etc/xdg").split(":"))))
|
||||||
|
return [user_config_home()] + paths
|
||||||
|
|
||||||
|
|
||||||
|
class Hash(ABC):
|
||||||
|
name: str
|
||||||
|
digest_size: int
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update(self, *data: bytes | bytearray | memoryview):
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def digest(self) -> bytes:
|
||||||
|
...
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
# bsv - Backup, Synchronization, Versioning
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
# Copyright (C) 2023 Simon Boyé
|
# Copyright (C) 2025 Simon Boyé
|
||||||
#
|
#
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
@@ -13,6 +13,11 @@
|
|||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""pybsv - A Backup, Synchronization and Versioning tool."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from bsv._version import __version__, __version_tuple__
|
from bsv._version import __version__, __version_tuple__
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["__version__", "__version_tuple__"]
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# bsv - Backup, Synchronization, Versioning
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
# Copyright (C) 2023 Simon Boyé
|
# Copyright (C) 2025 Simon Boyé
|
||||||
#
|
#
|
||||||
# This program is free software: you can redistribute it and/or modify
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
@@ -13,9 +13,11 @@
|
|||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Main entry-point. Allow to use bsv module as a command."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from bsv.main import main
|
from bsv.cli import cli
|
||||||
|
|
||||||
|
|
||||||
exit(main())
|
exit(cli())
|
||||||
|
|||||||
315
src/bsv/cli.py
Normal file
315
src/bsv/cli.py
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Command-line interface. This is where all bsv commands are defined."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import math
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
import platform
|
||||||
|
import sys
|
||||||
|
from typing import Any, ClassVar, Literal
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
from bsv.cli_utils import format_human_byte_size
|
||||||
|
from bsv.repo import default_repository_path
|
||||||
|
from bsv.vfs import (
|
||||||
|
AlreadyExistError,
|
||||||
|
FileMetadata,
|
||||||
|
NotFoundError,
|
||||||
|
Permissions,
|
||||||
|
VirtualFileSystem,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RepositoryParams:
|
||||||
|
"""Global parameters shared by all commands."""
|
||||||
|
|
||||||
|
path: Path
|
||||||
|
|
||||||
|
def as_filesystem(self) -> VirtualFileSystem:
|
||||||
|
return VirtualFileSystem(self.path)
|
||||||
|
|
||||||
|
|
||||||
|
class PermissionsType(click.ParamType):
|
||||||
|
"""Converter for permissions given on the command line."""
|
||||||
|
|
||||||
|
name: ClassVar[str] = "permissions"
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||||
|
) -> Permissions:
|
||||||
|
"""Convert an argument to a `Permissions` object."""
|
||||||
|
if isinstance(value, Permissions):
|
||||||
|
return value
|
||||||
|
|
||||||
|
try:
|
||||||
|
return Permissions(value)
|
||||||
|
except ValueError as err:
|
||||||
|
self.fail(str(err), param, ctx)
|
||||||
|
|
||||||
|
|
||||||
|
class BsvPathType(click.ParamType):
|
||||||
|
"""Converter for bsv paths given on the command line."""
|
||||||
|
|
||||||
|
name: ClassVar[str] = "bsv_path"
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||||
|
) -> PurePosixPath:
|
||||||
|
"""Convert an argument to a bsv path (absolute `PurePosixPath`)."""
|
||||||
|
if isinstance(value, PurePosixPath):
|
||||||
|
return value
|
||||||
|
|
||||||
|
try:
|
||||||
|
path = PurePosixPath(value)
|
||||||
|
except ValueError as err:
|
||||||
|
self.fail(str(err), param, ctx)
|
||||||
|
|
||||||
|
if not path.is_absolute():
|
||||||
|
self.fail(f"{value} is not an absolute path", param, ctx)
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
class AnyPathType(click.ParamType):
|
||||||
|
"""Converter for bsv or fs paths given on the command line."""
|
||||||
|
|
||||||
|
name: str = "any_path"
|
||||||
|
|
||||||
|
default: Literal["bsv", "fs"]
|
||||||
|
|
||||||
|
def __init__(self, default: Literal["bsv", "fs"] = "fs"):
|
||||||
|
self.default = default
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self, value: Any, param: click.Parameter | None, ctx: click.Context | None
|
||||||
|
) -> PurePosixPath | Path:
|
||||||
|
"""Convert an argument to a bsv or fs path."""
|
||||||
|
if isinstance(value, (PurePosixPath, Path)):
|
||||||
|
return value
|
||||||
|
|
||||||
|
if not isinstance(value, str):
|
||||||
|
self.fail(f"{value} is not a string")
|
||||||
|
|
||||||
|
path_type = self.default
|
||||||
|
if value.startswith("bsv:"):
|
||||||
|
path_type = "bsv"
|
||||||
|
value = value.removeprefix("bsv:")
|
||||||
|
elif value.startswith("fs:"):
|
||||||
|
path_type = "fs"
|
||||||
|
value = value.removeprefix("fs:")
|
||||||
|
|
||||||
|
if path_type == "bsv":
|
||||||
|
return BsvPathType().convert(value, param, ctx)
|
||||||
|
else:
|
||||||
|
return Path(value)
|
||||||
|
|
||||||
|
|
||||||
|
PERMISSIONS_TYPE = PermissionsType()
|
||||||
|
ANY_OBJECT_TYPE = BsvPathType() # TODO: accept bsv path and object id.
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
@click.version_option()
|
||||||
|
@click.option(
|
||||||
|
"--repo", envvar="BSV_REPO", type=click.Path(resolve_path=True, path_type=Path)
|
||||||
|
)
|
||||||
|
@click.pass_context
|
||||||
|
def cli(ctx: click.Context, repo: Path):
|
||||||
|
"""Backup, Synchronization and Versioning (bsv) tool.
|
||||||
|
|
||||||
|
bsv manages synchronization of several "devices" with history. This makes it
|
||||||
|
suitable for different tasks:
|
||||||
|
|
||||||
|
* Backup: Synchronize your data with remote devices that serve as backup. The
|
||||||
|
remotes should be configured to keep previous versions of the files (using
|
||||||
|
configurable rules) so even if a file is deleted/corrupted, a valid version can
|
||||||
|
be found in the backup devices.
|
||||||
|
* Synchronization: Synchronize your data among several devices you are working with.
|
||||||
|
In case of conflict, the conflicting versions of a file are stored in each
|
||||||
|
devices so it is possible to inspect and merge them to resolve the conflict.
|
||||||
|
* Versioning: A local device can be used to store different versions of the same
|
||||||
|
directory structure.
|
||||||
|
"""
|
||||||
|
ctx.obj = RepositoryParams(
|
||||||
|
path=repo or default_repository_path(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.pass_obj
|
||||||
|
def info(params: RepositoryParams):
|
||||||
|
"""Print information on the current repository."""
|
||||||
|
print(f"Repository: {params.path}")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option("-d", "--device-name", default=platform.node, prompt=True)
|
||||||
|
@click.pass_obj
|
||||||
|
def init(params: RepositoryParams, device_name: str):
|
||||||
|
"""Initialize a bsv repository."""
|
||||||
|
print(f"Repository path: {params.path!r}")
|
||||||
|
print(f"Device name: {device_name!r}")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("directories", nargs=-1, type=BsvPathType())
|
||||||
|
@click.option("-m", "--mode", type=PERMISSIONS_TYPE, default=Permissions(0o770))
|
||||||
|
@click.option("-p", "--parents", is_flag=True)
|
||||||
|
@click.option("-v", "--verbose", is_flag=True)
|
||||||
|
@click.pass_obj
|
||||||
|
def mkdir(
|
||||||
|
params: RepositoryParams,
|
||||||
|
directories: list[PurePosixPath],
|
||||||
|
mode: Permissions,
|
||||||
|
parents: bool = False,
|
||||||
|
verbose: bool = False,
|
||||||
|
):
|
||||||
|
"""Make a directory in the current repository."""
|
||||||
|
fs = params.as_filesystem()
|
||||||
|
|
||||||
|
return_code = 0
|
||||||
|
for dir in directories:
|
||||||
|
try:
|
||||||
|
fs.mkdir(dir, mode=mode, parents=parents)
|
||||||
|
except AlreadyExistError as error:
|
||||||
|
click.echo(error, file=sys.stderr)
|
||||||
|
except NotFoundError as error:
|
||||||
|
return_code = 1
|
||||||
|
click.echo(error, file=sys.stderr)
|
||||||
|
else:
|
||||||
|
if verbose:
|
||||||
|
click.echo(f"Created {dir}")
|
||||||
|
|
||||||
|
sys.exit(return_code)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("files", nargs=-1, type=BsvPathType())
|
||||||
|
@click.option("--filter", flag_value="hidden", default=True, hidden=True)
|
||||||
|
@click.option("-a", "--all", "filter", flag_value="all")
|
||||||
|
@click.option("-A", "--almost-all", "filter", flag_value="implied")
|
||||||
|
@click.option("-h", "--human-readable", is_flag=True)
|
||||||
|
@click.option("-l", "--list", is_flag=True)
|
||||||
|
@click.pass_obj
|
||||||
|
def ls(
|
||||||
|
params: RepositoryParams,
|
||||||
|
files: tuple[PurePosixPath],
|
||||||
|
filter: Literal["hidden", "implied", "all"],
|
||||||
|
human_readable: bool,
|
||||||
|
list: bool,
|
||||||
|
):
|
||||||
|
"""List information about files."""
|
||||||
|
fs = params.as_filesystem()
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
files = (PurePosixPath("/"),)
|
||||||
|
|
||||||
|
filter_md = FileMetadata.is_hidden if filter == "hidden" else lambda _: False
|
||||||
|
|
||||||
|
for file_index, file in enumerate(files):
|
||||||
|
if len(files) > 1:
|
||||||
|
if file_index:
|
||||||
|
click.echo()
|
||||||
|
click.echo(f"{file}:")
|
||||||
|
|
||||||
|
items = [(md.path.name, md) for md in fs.iter_dir(file) if not filter_md(md)]
|
||||||
|
items.sort()
|
||||||
|
|
||||||
|
if filter == "all":
|
||||||
|
items[0:0] = [
|
||||||
|
(".", fs.metadata(file)),
|
||||||
|
("..", fs.metadata(file.parent)),
|
||||||
|
]
|
||||||
|
|
||||||
|
if list:
|
||||||
|
rows: list[tuple[str, str, str, str]] = []
|
||||||
|
rows_width: list[int] = [0, 0, 0, 0]
|
||||||
|
for name, md in items:
|
||||||
|
mode = str(md.unix_mode)
|
||||||
|
size = (
|
||||||
|
format_human_byte_size(md.byte_size)
|
||||||
|
if human_readable
|
||||||
|
else str(md.byte_size)
|
||||||
|
)
|
||||||
|
local_time = md.modification_time.astimezone().replace(tzinfo=None)
|
||||||
|
time = local_time.isoformat(" ", "seconds")
|
||||||
|
row = (mode, size, time, name)
|
||||||
|
rows.append(row)
|
||||||
|
for index, field in enumerate(row):
|
||||||
|
rows_width[index] = max(rows_width[index], len(field))
|
||||||
|
|
||||||
|
for mode, size, time, name in rows:
|
||||||
|
click.echo(
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
mode.ljust(rows_width[0]),
|
||||||
|
size.rjust(rows_width[1]),
|
||||||
|
time.ljust(rows_width[2]),
|
||||||
|
name,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
for name, _ in items:
|
||||||
|
click.echo(name)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("object", type=ANY_OBJECT_TYPE)
|
||||||
|
@click.pass_obj
|
||||||
|
def show(
|
||||||
|
params: RepositoryParams,
|
||||||
|
object: PurePosixPath,
|
||||||
|
):
|
||||||
|
"""Show a bsv object."""
|
||||||
|
print(f"object: {object!r}")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("srcs", nargs=-1, type=AnyPathType(default="bsv"))
|
||||||
|
@click.argument("dst", type=AnyPathType(default="bsv"))
|
||||||
|
@click.option("-r", "--recursive", is_flag=True)
|
||||||
|
@click.pass_obj
|
||||||
|
def cp(
|
||||||
|
params: RepositoryParams,
|
||||||
|
srcs: list[PurePosixPath | Path],
|
||||||
|
dst: PurePosixPath | Path,
|
||||||
|
recursive: bool,
|
||||||
|
):
|
||||||
|
"""Copy files or directories."""
|
||||||
|
print(f"srcs: {srcs!r}")
|
||||||
|
print(f"dst: {dst!r}")
|
||||||
|
print(f"recursive: {recursive!r}")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("targets", nargs=-1, type=BsvPathType())
|
||||||
|
@click.option("-r", "--recursive", is_flag=True)
|
||||||
|
@click.pass_obj
|
||||||
|
def rm(
|
||||||
|
params: RepositoryParams,
|
||||||
|
targets: list[PurePosixPath],
|
||||||
|
recursive: bool,
|
||||||
|
):
|
||||||
|
"""Remove files or directories."""
|
||||||
|
print(f"targets: {targets}")
|
||||||
|
print(f"recursive: {recursive}")
|
||||||
48
src/bsv/cli_utils.py
Normal file
48
src/bsv/cli_utils.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Tools and utilities to build the command-line interface."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Final
|
||||||
|
|
||||||
|
|
||||||
|
BINARY_PREFIXES: Final[list[str]] = [
|
||||||
|
"",
|
||||||
|
"Ki",
|
||||||
|
"Mi",
|
||||||
|
"Gi",
|
||||||
|
"Ti",
|
||||||
|
"Pi",
|
||||||
|
"Ei",
|
||||||
|
"Zi",
|
||||||
|
"Yi",
|
||||||
|
"Ri",
|
||||||
|
"Qi",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def format_human_byte_size(byte_size: int) -> str:
|
||||||
|
"""Format the given `byte_size` as a human-readable string."""
|
||||||
|
index = min(max((byte_size.bit_length() - 1) // 10, 0), len(BINARY_PREFIXES) - 1)
|
||||||
|
size = byte_size / 1024**index
|
||||||
|
num_digits = len(str(int(size)))
|
||||||
|
decimals = max(0, 3 - num_digits)
|
||||||
|
rounded = round(size, decimals)
|
||||||
|
if rounded == 1024 and index + 1 < len(BINARY_PREFIXES):
|
||||||
|
rounded = 1
|
||||||
|
index += 1
|
||||||
|
return f"{rounded:.16g}{BINARY_PREFIXES[index]}B"
|
||||||
@@ -1,111 +0,0 @@
|
|||||||
# bsv - Backup, Synchronization, Versioning
|
|
||||||
# Copyright (C) 2023 Simon Boyé
|
|
||||||
#
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Affero General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from argparse import ArgumentParser
|
|
||||||
from os import getlogin
|
|
||||||
from pathlib import Path
|
|
||||||
import platform
|
|
||||||
|
|
||||||
from bsv.command import command
|
|
||||||
|
|
||||||
|
|
||||||
def init_parser(parser: ArgumentParser):
|
|
||||||
parser.add_argument(
|
|
||||||
"--name", "-d",
|
|
||||||
help = "Name of the repository. Default to system hostname.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--interactive", "-i",
|
|
||||||
action = "store_true",
|
|
||||||
help = "Prompt the user for configuration choices.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"destination",
|
|
||||||
type = Path,
|
|
||||||
nargs = "?",
|
|
||||||
help = "Path to a non-existing or empty folder where bsv data will be stored.",
|
|
||||||
)
|
|
||||||
|
|
||||||
@command(init_parser)
|
|
||||||
def init(
|
|
||||||
repository_path: Path | None,
|
|
||||||
destination: Path | None = None,
|
|
||||||
name: str | None = None,
|
|
||||||
interactive: bool = False,
|
|
||||||
) -> int:
|
|
||||||
"""Initialize a new bsv repository.
|
|
||||||
"""
|
|
||||||
from datetime import datetime as DateTime
|
|
||||||
import tomlkit
|
|
||||||
|
|
||||||
if name is None:
|
|
||||||
name = platform.node()
|
|
||||||
|
|
||||||
if destination is None:
|
|
||||||
# TODO: Choose a sensible system-dependent path.
|
|
||||||
destination = Path.cwd()
|
|
||||||
|
|
||||||
if interactive:
|
|
||||||
name = input(f"Repository name: (default to {name})\n").strip() or name
|
|
||||||
destination = Path(input(f"Destination: (default to {destination})\n").strip()) or destination
|
|
||||||
if not destination.is_absolute():
|
|
||||||
destination = Path.cwd() / destination
|
|
||||||
|
|
||||||
if not name:
|
|
||||||
raise RuntimeError("repository name cannot be empty")
|
|
||||||
if not destination.parent.exists():
|
|
||||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
|
||||||
if destination.exists() and not destination.is_dir():
|
|
||||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
|
||||||
if destination.exists() and len(list(destination.iterdir())):
|
|
||||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
|
||||||
|
|
||||||
try:
|
|
||||||
destination.mkdir(exist_ok=True)
|
|
||||||
except:
|
|
||||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
|
||||||
|
|
||||||
bsv_table = tomlkit.table()
|
|
||||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
|
||||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
|
||||||
bsv_table.add("name", name)
|
|
||||||
bsv_table.add(tomlkit.nl())
|
|
||||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
|
||||||
bsv_table.add("path_map", tomlkit.array())
|
|
||||||
|
|
||||||
cas_table = tomlkit.table()
|
|
||||||
cas_table.add("type", "simple")
|
|
||||||
cas_table.add("hash", "sha256")
|
|
||||||
|
|
||||||
doc = tomlkit.document()
|
|
||||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
|
||||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
|
||||||
doc.add(tomlkit.nl())
|
|
||||||
doc.add("bsv", bsv_table)
|
|
||||||
doc.add(tomlkit.nl())
|
|
||||||
doc.add("cas", cas_table)
|
|
||||||
|
|
||||||
config_path = destination / "bsv_config.toml"
|
|
||||||
try:
|
|
||||||
stream = config_path.open("w", encoding="utf-8")
|
|
||||||
except:
|
|
||||||
raise RuntimeError("failed to open configuration file {config_path}")
|
|
||||||
|
|
||||||
with stream:
|
|
||||||
tomlkit.dump(doc, stream)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
0
src/bsv/py.typed
Normal file
0
src/bsv/py.typed
Normal file
41
src/bsv/repo.py
Normal file
41
src/bsv/repo.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import platform
|
||||||
|
|
||||||
|
|
||||||
|
def default_repository_path() -> Path:
|
||||||
|
"""Return the system-dependent default repository path."""
|
||||||
|
if platform.system() in ("Windows", "Darwin", "Java"):
|
||||||
|
msg = f"default_repository_path does not support {platform.system()} system"
|
||||||
|
raise NotImplementedError(msg)
|
||||||
|
else: # Assume Unix
|
||||||
|
# See https://specifications.freedesktop.org/basedir-spec/latest/
|
||||||
|
data_home = os.environ.get("XDG_DATA_HOME", "")
|
||||||
|
if data_home:
|
||||||
|
path = Path(data_home)
|
||||||
|
if not path.is_absolute() or not path.exists():
|
||||||
|
msg = (
|
||||||
|
f"invalid XDG_DATA_HOME ({path}): path is relative or does not "
|
||||||
|
"exists"
|
||||||
|
)
|
||||||
|
raise RuntimeError(msg)
|
||||||
|
else:
|
||||||
|
path = Path.home() / ".local/share"
|
||||||
|
return path / "bsv/repo"
|
||||||
@@ -1,437 +0,0 @@
|
|||||||
# bsv - Backup, Synchronization, Versioning
|
|
||||||
# Copyright (C) 2023 Simon Boyé
|
|
||||||
#
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Affero General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import UTC, datetime as DateTime, timedelta as TimeDelta
|
|
||||||
import hashlib
|
|
||||||
from io import BytesIO
|
|
||||||
from pathlib import Path, PurePosixPath
|
|
||||||
import platform
|
|
||||||
import tomllib
|
|
||||||
from typing import Any, BinaryIO, Callable, Type
|
|
||||||
|
|
||||||
from fastcdc import fastcdc
|
|
||||||
import tomlkit
|
|
||||||
|
|
||||||
from bsv import __version__
|
|
||||||
from bsv.simple_cas import SimpleCas
|
|
||||||
from bsv.simple_cas.cas import ConfigError, Digest, SimpleCas
|
|
||||||
from bsv.simple_cas.util import Hash, read_exact, read_exact_or_eof
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_MIN_CHUNK_SIZE = 1 << 12
|
|
||||||
DEFAULT_AVG_CHUNK_SIZE = 1 << 16
|
|
||||||
DEFAULT_MAX_CHUNK_SIZE = 1 << 20
|
|
||||||
|
|
||||||
|
|
||||||
class Repository:
|
|
||||||
_path: Path
|
|
||||||
_name: str
|
|
||||||
|
|
||||||
_cas: SimpleCas
|
|
||||||
_min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
|
|
||||||
_avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
|
|
||||||
_max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE
|
|
||||||
|
|
||||||
_path_map: list[PathPair]
|
|
||||||
# _remotes: list[object]
|
|
||||||
|
|
||||||
_context_depth: int = 0
|
|
||||||
|
|
||||||
def __init__(self, path: Path):
|
|
||||||
self._path = path
|
|
||||||
|
|
||||||
with self.config_file.open("rb") as stream:
|
|
||||||
config = tomllib.load(stream)
|
|
||||||
|
|
||||||
bsv = config.get("bsv", {})
|
|
||||||
|
|
||||||
self._name = bsv.get("name") or platform.node()
|
|
||||||
|
|
||||||
self._cas = make_cas(
|
|
||||||
bsv.get("cas"),
|
|
||||||
self._path,
|
|
||||||
lambda: hashlib.new(bsv.get("hash")), # type: ignore
|
|
||||||
)
|
|
||||||
self._min_chunk_size = bsv.get("min_chunk_size")
|
|
||||||
self._avg_chunk_size = bsv.get("avg_chunk_size")
|
|
||||||
self._max_chunk_size = bsv.get("max_chunk_size")
|
|
||||||
|
|
||||||
self._path_map = [
|
|
||||||
PathPair.from_obj(pair)
|
|
||||||
for pair in bsv.get("path_map", [])
|
|
||||||
]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path(self) -> Path:
|
|
||||||
return self._path
|
|
||||||
|
|
||||||
@property
|
|
||||||
def config_file(self) -> Path:
|
|
||||||
return self.path / "bsv_config.toml"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> str:
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def path_map(self) -> list[PathPair]:
|
|
||||||
return list(self._path_map)
|
|
||||||
|
|
||||||
def get_blob(self, digest: Digest) -> Blob:
|
|
||||||
with self:
|
|
||||||
return self._read(digest, object_type=b"blob", cls=Blob) # type: ignore
|
|
||||||
|
|
||||||
def add_blob(self, stream: BinaryIO) -> Digest:
|
|
||||||
with self:
|
|
||||||
return self._write(b"blob", stream)
|
|
||||||
|
|
||||||
def get_tree(self, digest: Digest) -> Tree:
|
|
||||||
with self:
|
|
||||||
return Tree.from_bytes(self, self._cas.read(digest, object_type=b"tree").data)
|
|
||||||
|
|
||||||
def add_tree(self, tree: Tree) -> Digest:
|
|
||||||
with self:
|
|
||||||
return self._cas.write(b"tree", tree.to_bytes())
|
|
||||||
|
|
||||||
def get_snapshot(self, digest: Digest) -> Snapshot:
|
|
||||||
with self:
|
|
||||||
return Snapshot.from_bytes(self, self._cas.read(digest, object_type=b"snap").data)
|
|
||||||
|
|
||||||
def add_snapshot(self, snapshot: Snapshot) -> Digest:
|
|
||||||
with self:
|
|
||||||
return self._cas.write(b"snap", snapshot.to_bytes())
|
|
||||||
|
|
||||||
def _read(self, digest: Digest, object_type: bytes, cls: Type[ChunkedObject]) -> ChunkedObject:
|
|
||||||
obj = self._cas.read(digest, object_type=object_type)
|
|
||||||
stream = BytesIO(obj.data)
|
|
||||||
return cls.from_stream(self, stream, digest_size=self._cas._digest_size)
|
|
||||||
|
|
||||||
def _write(self, object_type: bytes, stream: BinaryIO) -> Digest:
|
|
||||||
out = BytesIO()
|
|
||||||
size = 0
|
|
||||||
for chunk in fastcdc(
|
|
||||||
stream,
|
|
||||||
min_size = self._min_chunk_size,
|
|
||||||
avg_size = self._avg_chunk_size,
|
|
||||||
max_size = self._max_chunk_size,
|
|
||||||
fat = True,
|
|
||||||
):
|
|
||||||
size += chunk.length
|
|
||||||
digest = self._cas.write(b"chnk", chunk.data)
|
|
||||||
out.write(digest.digest)
|
|
||||||
out.write(chunk.length.to_bytes(4))
|
|
||||||
return self._cas.write(object_type, size.to_bytes(8) + out.getvalue())
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
if self._context_depth == 0:
|
|
||||||
self._cas.__enter__()
|
|
||||||
self._context_depth += 1
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
|
||||||
self._context_depth -= 1
|
|
||||||
if self._context_depth == 0:
|
|
||||||
return self._cas.__exit__(exc_type, exc_value, traceback)
|
|
||||||
|
|
||||||
|
|
||||||
def create_repository(
|
|
||||||
destination: Path,
|
|
||||||
name: str,
|
|
||||||
cas: str = "simple",
|
|
||||||
hash: str = "sha256",
|
|
||||||
min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
|
|
||||||
avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE,
|
|
||||||
max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
|
|
||||||
):
|
|
||||||
from datetime import datetime as DateTime
|
|
||||||
from os import getlogin
|
|
||||||
|
|
||||||
if not name:
|
|
||||||
raise RuntimeError("repository name cannot be empty")
|
|
||||||
if not destination.parent.exists():
|
|
||||||
raise RuntimeError(f"destination directory {destination.parent} does not exists")
|
|
||||||
if destination.exists() and not destination.is_dir():
|
|
||||||
raise RuntimeError(f"destination {destination} exists but is not a directory")
|
|
||||||
if destination.exists() and len(list(destination.iterdir())):
|
|
||||||
raise RuntimeError(f"destination directory {destination} is not empty")
|
|
||||||
|
|
||||||
try:
|
|
||||||
destination.mkdir(exist_ok=True)
|
|
||||||
except:
|
|
||||||
raise RuntimeError(f"failed to create destination directory {destination}")
|
|
||||||
|
|
||||||
bsv_table = tomlkit.table()
|
|
||||||
bsv_table.add(tomlkit.comment("Name of the repository."))
|
|
||||||
bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
|
|
||||||
bsv_table.add("name", name)
|
|
||||||
bsv_table.add(tomlkit.nl())
|
|
||||||
bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
|
|
||||||
bsv_table.add("path_map", tomlkit.array())
|
|
||||||
bsv_table.add("cas", cas)
|
|
||||||
bsv_table.add("hash", hash)
|
|
||||||
bsv_table.add("min_chunk_size", min_chunk_size)
|
|
||||||
bsv_table.add("avg_chunk_size", avg_chunk_size)
|
|
||||||
bsv_table.add("max_chunk_size", max_chunk_size)
|
|
||||||
|
|
||||||
doc = tomlkit.document()
|
|
||||||
doc.add(tomlkit.comment("bsv repository configuration"))
|
|
||||||
doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
|
|
||||||
doc.add(tomlkit.nl())
|
|
||||||
doc.add("bsv", bsv_table)
|
|
||||||
|
|
||||||
config_path = destination / "bsv_config.toml"
|
|
||||||
try:
|
|
||||||
stream = config_path.open("w", encoding="utf-8")
|
|
||||||
except:
|
|
||||||
raise RuntimeError("failed to open configuration file {config_path}")
|
|
||||||
|
|
||||||
with stream:
|
|
||||||
tomlkit.dump(doc, stream)
|
|
||||||
|
|
||||||
return Repository(destination)
|
|
||||||
|
|
||||||
|
|
||||||
def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> SimpleCas:
|
|
||||||
if cas_name == "simple":
|
|
||||||
return SimpleCas(path, hash_factory)
|
|
||||||
raise ConfigError(f"unknown cas name {cas_name}")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ChunkedObject:
|
|
||||||
repo: Repository
|
|
||||||
size: int
|
|
||||||
chunks: list[Chunk]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> ChunkedObject:
|
|
||||||
self = cls(
|
|
||||||
repo = repo,
|
|
||||||
size = int.from_bytes(read_exact(stream, 8)),
|
|
||||||
chunks = [],
|
|
||||||
)
|
|
||||||
while (chunk := Chunk.from_stream(stream, digest_size)) is not None:
|
|
||||||
self.chunks.append(chunk)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Blob(ChunkedObject):
|
|
||||||
_chunk_index: int = 0
|
|
||||||
_chunk_data: bytes = b""
|
|
||||||
|
|
||||||
def read(self, num_bytes: int = -1) -> bytes:
|
|
||||||
parts = [self._chunk_data]
|
|
||||||
size = len(parts[-1])
|
|
||||||
while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(self.chunks):
|
|
||||||
parts.append(self.read1())
|
|
||||||
size += len(parts[-1])
|
|
||||||
if num_bytes >= 0:
|
|
||||||
self._chunk_data = parts[-1][num_bytes - size:]
|
|
||||||
else:
|
|
||||||
self._chunk_data = b""
|
|
||||||
return b"".join(parts)
|
|
||||||
|
|
||||||
def read1(self) -> bytes:
|
|
||||||
if self._chunk_index == len(self.chunks):
|
|
||||||
return b""
|
|
||||||
object = self.repo._cas.read(self.chunks[self._chunk_index].digest, object_type=b"chnk")
|
|
||||||
self._chunk_index += 1
|
|
||||||
return object.data
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Tree:
|
|
||||||
repo: Repository
|
|
||||||
items: list[TreeItem]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Tree:
|
|
||||||
tree = Tree(repo, [])
|
|
||||||
while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
|
|
||||||
tree.items.append(item)
|
|
||||||
return tree
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_bytes(cls, repo: Repository, data: bytes) -> Tree:
|
|
||||||
stream = BytesIO(data)
|
|
||||||
return cls.from_stream(repo, stream)
|
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
|
||||||
self.items.sort(key=lambda i: i.name)
|
|
||||||
for item in self.items:
|
|
||||||
item.write(stream)
|
|
||||||
|
|
||||||
def to_bytes(self) -> bytes:
|
|
||||||
stream = BytesIO()
|
|
||||||
self.write(stream)
|
|
||||||
return stream.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
EPOCH = DateTime(1970, 1, 1, tzinfo=UTC)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TreeItem:
|
|
||||||
name: str
|
|
||||||
digest: Digest
|
|
||||||
permissions: int
|
|
||||||
creation_timestamp: int
|
|
||||||
modification_timestamp: int
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
digest: Digest,
|
|
||||||
permissions: int,
|
|
||||||
creation_timestamp: int,
|
|
||||||
modification_timestamp: int,
|
|
||||||
):
|
|
||||||
if "/\\" in name:
|
|
||||||
raise ValueError(f"invalid tree item name {name}")
|
|
||||||
self.name = name
|
|
||||||
self.digest = digest
|
|
||||||
self.permissions = permissions
|
|
||||||
self.creation_timestamp = creation_timestamp
|
|
||||||
self.modification_timestamp = modification_timestamp
|
|
||||||
|
|
||||||
@property
|
|
||||||
def creation_time(self) -> DateTime:
|
|
||||||
return time_from_timestamp(self.creation_timestamp)
|
|
||||||
@creation_time.setter
|
|
||||||
def creation_time(self, time: DateTime):
|
|
||||||
self.creation_timestamp = timestamp_from_time(time)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def modification_time(self) -> DateTime:
|
|
||||||
return time_from_timestamp(self.modification_timestamp)
|
|
||||||
@modification_time.setter
|
|
||||||
def modification_time(self, time: DateTime):
|
|
||||||
self.modification_timestamp = timestamp_from_time(time)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> TreeItem | None:
|
|
||||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
|
||||||
if digest_bytes is None:
|
|
||||||
return None
|
|
||||||
return TreeItem(
|
|
||||||
digest = Digest(digest_bytes),
|
|
||||||
permissions = int.from_bytes(read_exact(stream, 2)),
|
|
||||||
creation_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
|
||||||
modification_timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
|
||||||
name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
|
||||||
stream.write(self.digest.digest)
|
|
||||||
stream.write(self.permissions.to_bytes(2))
|
|
||||||
stream.write(self.creation_timestamp.to_bytes(8, signed=True))
|
|
||||||
stream.write(self.modification_timestamp.to_bytes(8, signed=True))
|
|
||||||
name_bytes = self.name.encode("utf-8")
|
|
||||||
stream.write(len(name_bytes).to_bytes(2))
|
|
||||||
stream.write(name_bytes)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Snapshot:
|
|
||||||
repo: Repository
|
|
||||||
tree_digest: Digest
|
|
||||||
repo_name: str
|
|
||||||
timestamp: int
|
|
||||||
|
|
||||||
@property
|
|
||||||
def time(self) -> DateTime:
|
|
||||||
return time_from_timestamp(self.timestamp)
|
|
||||||
@time.setter
|
|
||||||
def time(self, time: DateTime):
|
|
||||||
self.timestamp = timestamp_from_time(time)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, repo: Repository, stream: BinaryIO) -> Snapshot:
|
|
||||||
return Snapshot(
|
|
||||||
repo = repo,
|
|
||||||
tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
|
|
||||||
repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
|
|
||||||
timestamp = int.from_bytes(read_exact(stream, 8), signed=True),
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_bytes(cls, repo: Repository, data: bytes) -> Snapshot:
|
|
||||||
stream = BytesIO(data)
|
|
||||||
return cls.from_stream(repo, stream)
|
|
||||||
|
|
||||||
def write(self, stream: BinaryIO):
|
|
||||||
stream.write(self.tree_digest.digest)
|
|
||||||
repo_name_bytes = self.repo_name.encode("utf-8")
|
|
||||||
stream.write(len(repo_name_bytes).to_bytes(2))
|
|
||||||
stream.write(repo_name_bytes)
|
|
||||||
stream.write(self.timestamp.to_bytes(8, signed=True))
|
|
||||||
|
|
||||||
def to_bytes(self) -> bytes:
|
|
||||||
stream = BytesIO()
|
|
||||||
self.write(stream)
|
|
||||||
return stream.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Chunk:
|
|
||||||
digest: Digest
|
|
||||||
size: int
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_stream(cls, stream: BinaryIO, digest_size: int) -> Chunk | None:
|
|
||||||
digest_bytes = read_exact_or_eof(stream, digest_size)
|
|
||||||
if digest_bytes is None:
|
|
||||||
return None
|
|
||||||
digest = Digest(digest_bytes)
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
digest = digest,
|
|
||||||
size = int.from_bytes(read_exact(stream, 4)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class PathPair:
|
|
||||||
bsv: PurePosixPath
|
|
||||||
fs: Path
|
|
||||||
|
|
||||||
def __init__(self, bsv: PurePosixPath, fs: Path):
|
|
||||||
self.bsv = bsv
|
|
||||||
self.fs = fs
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_obj(cls, obj: dict[str, Any]) -> PathPair:
|
|
||||||
bsv = PurePosixPath(obj["bsv"])
|
|
||||||
fs = Path(obj["fs"])
|
|
||||||
|
|
||||||
if not bsv.is_absolute() or not fs.is_absolute():
|
|
||||||
raise ValueError("paths in path_map must be absolute")
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
bsv = obj["bsv"],
|
|
||||||
fs = obj["fs"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def __lt__(self, rhs: PathPair) -> bool:
|
|
||||||
return self.bsv < rhs.bsv
|
|
||||||
|
|
||||||
|
|
||||||
def time_from_timestamp(timestamp: int) -> DateTime:
|
|
||||||
return EPOCH + TimeDelta(microseconds=timestamp)
|
|
||||||
|
|
||||||
def timestamp_from_time(time: DateTime) -> int:
|
|
||||||
return (time.astimezone(UTC) - EPOCH) // TimeDelta(microseconds=1)
|
|
||||||
348
src/bsv/vfs.py
Normal file
348
src/bsv/vfs.py
Normal file
@@ -0,0 +1,348 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Provide a virtual file system interface alongside associated tools."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from functools import total_ordering
|
||||||
|
import os
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
from stat import S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, S_IMODE, filemode
|
||||||
|
from typing import TYPE_CHECKING, Any, BinaryIO, Literal, Self
|
||||||
|
|
||||||
|
from typing_extensions import Buffer
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from os import stat_result
|
||||||
|
|
||||||
|
|
||||||
|
AnyBsvPath = PurePosixPath | str
|
||||||
|
|
||||||
|
|
||||||
|
class FsError(RuntimeError):
|
||||||
|
"""Error type raised by `FileSystem` objects."""
|
||||||
|
|
||||||
|
|
||||||
|
class AlreadyExistError(FsError):
|
||||||
|
"""Raise when trying to create an item that already exists."""
|
||||||
|
|
||||||
|
|
||||||
|
class NotFoundError(FsError):
|
||||||
|
"""Raise when trying to access an item that do not exist."""
|
||||||
|
|
||||||
|
|
||||||
|
class Permissions:
|
||||||
|
"""Represent the permissions of an object in a filesystem."""
|
||||||
|
|
||||||
|
unix_perms: int
|
||||||
|
|
||||||
|
def __init__(self, unix_perms: int | str = 0o640):
|
||||||
|
"""Create a `Permissions` object from `unix_perms`."""
|
||||||
|
if isinstance(unix_perms, str):
|
||||||
|
unix_perms = int(unix_perms, 8)
|
||||||
|
self.unix_perms = unix_perms
|
||||||
|
|
||||||
|
def __eq__(self, rhs: Any) -> bool:
|
||||||
|
"""Test if two `Permission` are the same."""
|
||||||
|
return (
|
||||||
|
rhs.unix_perms == self.unix_perms
|
||||||
|
if isinstance(rhs, Permissions)
|
||||||
|
else NotImplemented
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Return a representation of the permissions as valid python code."""
|
||||||
|
return f"Permissions(0o{oct(self.unix_perms)[2:].rjust(4, '0')})"
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""Return a representation of the permissions of the form 'rwxrwxrwx'."""
|
||||||
|
return filemode(self.unix_perms)[1:]
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_DIR_PERMS = Permissions(0o770)
|
||||||
|
DEFAULT_FILE_PERMS = Permissions(0o640)
|
||||||
|
|
||||||
|
|
||||||
|
FileType = Literal["dir", "file", "symlink", "other"]
|
||||||
|
|
||||||
|
_IFMT_MAP: dict[int, FileType] = {
|
||||||
|
S_IFDIR: "dir",
|
||||||
|
S_IFREG: "file",
|
||||||
|
S_IFLNK: "symlink",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@total_ordering
|
||||||
|
class FileMetadata:
|
||||||
|
"""Metadata associated with vfs files: file type, permissions, etc."""
|
||||||
|
|
||||||
|
path: PurePosixPath
|
||||||
|
type: FileType
|
||||||
|
permissions: Permissions
|
||||||
|
modification_time: datetime
|
||||||
|
byte_size: int
|
||||||
|
_stat: stat_result
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
path: PurePosixPath,
|
||||||
|
*,
|
||||||
|
type: FileType,
|
||||||
|
permissions: Permissions,
|
||||||
|
modification_time: datetime,
|
||||||
|
byte_size: int,
|
||||||
|
):
|
||||||
|
"""Create a `FileMetadata`."""
|
||||||
|
self.path = path
|
||||||
|
self.type = type
|
||||||
|
self.permissions = permissions
|
||||||
|
self.modification_time = modification_time
|
||||||
|
self.byte_size = byte_size
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_stat(
|
||||||
|
cls,
|
||||||
|
path: PurePosixPath,
|
||||||
|
stat: stat_result,
|
||||||
|
) -> Self:
|
||||||
|
"""Create a `FileMetadata` from a `stat_result`."""
|
||||||
|
return cls(
|
||||||
|
path,
|
||||||
|
type=_IFMT_MAP.get(S_IFMT(stat.st_mode), "other"),
|
||||||
|
permissions=Permissions(S_IMODE(stat.st_mode)),
|
||||||
|
modification_time=datetime.fromtimestamp(stat.st_mtime, UTC),
|
||||||
|
byte_size=stat.st_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unix_mode(self) -> str:
|
||||||
|
"""Return unix-like mode in the form '-rwxrwxrwx'."""
|
||||||
|
return UNIX_MODE_FILE_TYPE[self.type] + str(self.permissions)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_hidden(self) -> bool:
|
||||||
|
"""Return true if the file starts with a '.'."""
|
||||||
|
return self.path.name.startswith(".")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_file(self) -> bool:
|
||||||
|
"""Test if this is a file."""
|
||||||
|
return self.type == "file"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_dir(self) -> bool:
|
||||||
|
"""Test if this is a directory."""
|
||||||
|
return self.type == "dir"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_symlink(self) -> bool:
|
||||||
|
"""Test if this is a symbolic link."""
|
||||||
|
return self.type == "symlink"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_other(self) -> bool:
|
||||||
|
"""Test if this is a symbolic link."""
|
||||||
|
return self.type == "other"
|
||||||
|
|
||||||
|
def _as_tuple(
|
||||||
|
self,
|
||||||
|
) -> tuple[PurePosixPath, FileType, Permissions, datetime, int]:
|
||||||
|
return (
|
||||||
|
self.path,
|
||||||
|
self.type,
|
||||||
|
self.permissions,
|
||||||
|
self.modification_time,
|
||||||
|
self.byte_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __eq__(self, rhs: Any) -> bool:
|
||||||
|
"""Test if two `Metadata` are the same."""
|
||||||
|
return (
|
||||||
|
self._as_tuple() == rhs._as_tuple()
|
||||||
|
if isinstance(rhs, FileMetadata)
|
||||||
|
else NotImplemented
|
||||||
|
)
|
||||||
|
|
||||||
|
def __lt__(self, rhs: Any) -> bool:
|
||||||
|
"""Compare `rhs.path` with `self.path`."""
|
||||||
|
return self.path < rhs.path if isinstance(rhs, FileMetadata) else NotImplemented
|
||||||
|
|
||||||
|
|
||||||
|
UNIX_MODE_FILE_TYPE = {
|
||||||
|
"dir": "d",
|
||||||
|
"file": "-",
|
||||||
|
"other": "o",
|
||||||
|
"link": "l",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VirtualFileSystem:
|
||||||
|
"""Represent a file system, with common file system operations."""
|
||||||
|
|
||||||
|
path: Path
|
||||||
|
|
||||||
|
def __init__(self, path: Path):
|
||||||
|
"""Initialize the file system to point to `path`."""
|
||||||
|
self.path = path
|
||||||
|
|
||||||
|
def exists(self, path: AnyBsvPath) -> bool:
|
||||||
|
"""Test if the `path` point to an existing item."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
return self._real_path(path).exists()
|
||||||
|
|
||||||
|
def is_file(self, path: AnyBsvPath) -> bool:
|
||||||
|
"""Test if `path` is a file."""
|
||||||
|
return self.metadata(path).is_file
|
||||||
|
|
||||||
|
def is_dir(self, path: AnyBsvPath) -> bool:
|
||||||
|
"""Test if `path` is a directory."""
|
||||||
|
return self.metadata(path).is_dir
|
||||||
|
|
||||||
|
def is_symlink(self, path: AnyBsvPath) -> bool:
|
||||||
|
"""Test if `path` is a symbolic link."""
|
||||||
|
return self.metadata(path).is_symlink
|
||||||
|
|
||||||
|
def is_other(self, path: AnyBsvPath) -> bool:
|
||||||
|
"""Test if `path` is not a file, directory or symbolic link."""
|
||||||
|
return self.metadata(path).is_other
|
||||||
|
|
||||||
|
def metadata(self, path: AnyBsvPath) -> FileMetadata:
|
||||||
|
"""Return the metadata of a given object."""
|
||||||
|
metadata = self.metadata_or_none(path)
|
||||||
|
if metadata is None:
|
||||||
|
msg = f"file '{path}' not found"
|
||||||
|
raise NotFoundError(msg)
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
def metadata_or_none(self, path: AnyBsvPath) -> FileMetadata | None:
|
||||||
|
"""Return the metadata of a given object or `None` if it does not exists."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
try:
|
||||||
|
stat = self._real_path(path).stat(follow_symlinks=False)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
except OSError as err:
|
||||||
|
msg = f"failed to read '{path}' metadata"
|
||||||
|
raise FsError(msg) from err
|
||||||
|
return FileMetadata.from_stat(path, stat)
|
||||||
|
|
||||||
|
def iter_dir(self, path: AnyBsvPath) -> Iterator[FileMetadata]:
|
||||||
|
"""Return the metadata of all items in the directory `path`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
real_path = self._real_path(path)
|
||||||
|
try:
|
||||||
|
for entry in os.scandir(real_path):
|
||||||
|
yield FileMetadata.from_stat(
|
||||||
|
path / entry.name, entry.stat(follow_symlinks=False)
|
||||||
|
)
|
||||||
|
except OSError as err:
|
||||||
|
msg = f"failed to read directory {path}"
|
||||||
|
raise FsError(msg) from err
|
||||||
|
|
||||||
|
def read_bytes(self, path: AnyBsvPath) -> bytes:
|
||||||
|
"""Return the content of `path` as `bytes`."""
|
||||||
|
with self.open_read(path) as stream:
|
||||||
|
return stream.read()
|
||||||
|
|
||||||
|
def write_bytes(self, path: AnyBsvPath, data: Buffer | BinaryIO) -> int:
|
||||||
|
"""Create or replace a file at `path`, setting its content to `data`."""
|
||||||
|
written = 0
|
||||||
|
with self.open_write(path) as sout:
|
||||||
|
if isinstance(data, Buffer):
|
||||||
|
written += sout.write(data)
|
||||||
|
else:
|
||||||
|
while chunk := data.read(65536):
|
||||||
|
written += sout.write(chunk)
|
||||||
|
return written
|
||||||
|
|
||||||
|
def open_read(self, path: AnyBsvPath) -> BinaryIO:
|
||||||
|
"""Return a read-only binary stream that read the content of `path`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
try:
|
||||||
|
return self._real_path(path).open("rb")
|
||||||
|
except OSError as err:
|
||||||
|
msg = f"failed to read {path}"
|
||||||
|
raise FsError(msg) from err
|
||||||
|
|
||||||
|
def open_write(self, path: AnyBsvPath) -> BinaryIO:
|
||||||
|
"""Return a write-only binary stream write to `path`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
try:
|
||||||
|
return self._real_path(path).open("wb")
|
||||||
|
except OSError as err:
|
||||||
|
msg = f"failed to read {path}"
|
||||||
|
raise FsError(msg) from err
|
||||||
|
|
||||||
|
def mkdir(
|
||||||
|
self,
|
||||||
|
path: AnyBsvPath,
|
||||||
|
mode: Permissions = DEFAULT_DIR_PERMS,
|
||||||
|
parents: bool = False,
|
||||||
|
exist_ok: bool = False,
|
||||||
|
):
|
||||||
|
"""Create a directory at `path`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The directory to create.
|
||||||
|
mode: The permissions of the new directory.
|
||||||
|
parents: If `True`, create parent directories if they don't exists.
|
||||||
|
exist_ok: If `False` and `path` already exist, raise an error.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FsError: If something goes wrong.
|
||||||
|
"""
|
||||||
|
path = self._make_path(path)
|
||||||
|
try:
|
||||||
|
self._real_path(path).mkdir(
|
||||||
|
mode=mode.unix_perms, parents=parents, exist_ok=exist_ok
|
||||||
|
)
|
||||||
|
except FileExistsError as err:
|
||||||
|
msg = f"{path} already exists"
|
||||||
|
raise AlreadyExistError(msg) from err
|
||||||
|
except FileNotFoundError as err:
|
||||||
|
msg = f"{path.parent} does not exist"
|
||||||
|
raise NotFoundError(msg) from err
|
||||||
|
|
||||||
|
def make_link(self, path: AnyBsvPath, target: AnyBsvPath) -> None:
|
||||||
|
"""Creates a symbolic link from `path` to `target`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
target = self._make_path(path)
|
||||||
|
self._real_path(path).symlink_to(self._real_path(target))
|
||||||
|
|
||||||
|
def set_permissions(self, path: AnyBsvPath, permissions: Permissions) -> None:
|
||||||
|
"""Set the permissions of `path` to `permissions`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
self._real_path(path).chmod(permissions.unix_perms)
|
||||||
|
|
||||||
|
def set_modification_time(self, path: AnyBsvPath, mod_time: datetime) -> None:
|
||||||
|
"""Set the modification time of `path` to `mod_time`."""
|
||||||
|
path = self._make_path(path)
|
||||||
|
ts = mod_time.timestamp()
|
||||||
|
os.utime(self._real_path(path), (ts, ts))
|
||||||
|
|
||||||
|
def _make_path(self, path: AnyBsvPath) -> PurePosixPath:
|
||||||
|
if not isinstance(path, PurePosixPath):
|
||||||
|
path = PurePosixPath(path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
msg = f"{path} is not absolute"
|
||||||
|
raise FsError(msg)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _real_path(self, path: PurePosixPath) -> Path:
|
||||||
|
return self.path / path.relative_to("/")
|
||||||
286
tests.bak/test_repository.py
Normal file
286
tests.bak/test_repository.py
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
# bsv - Backup, Synchronization, Versioning
|
||||||
|
# Copyright (C) 2023 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from os import stat_result
|
||||||
|
from pathlib import Path
|
||||||
|
from random import randbytes
|
||||||
|
from shutil import rmtree
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_us_from_time
|
||||||
|
from bsv.simple_cas.cas import Digest
|
||||||
|
from bsv.tree_walker import Action, IgnoreCause, TreeWalker
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tmp_dir():
|
||||||
|
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
|
||||||
|
yield Path(tmp_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def repo(tmp_dir):
|
||||||
|
return create_repository(
|
||||||
|
tmp_dir / "bsv.config",
|
||||||
|
"test_repo",
|
||||||
|
tmp_dir / "bsv_repo",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
||||||
|
path = tmp_dir / "test.dat"
|
||||||
|
make_random_file(path, 1 << 20)
|
||||||
|
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
obj0 = repo.add_blob(stream)
|
||||||
|
assert obj0.object_type == b"blob"
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
assert obj0.blob.reader().read() == stream.read()
|
||||||
|
|
||||||
|
obj1 = repo.get_blob(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
with path.open("rb") as stream:
|
||||||
|
assert obj1.blob.reader().read() == stream.read()
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_write_tree(repo: Repository):
|
||||||
|
now = datetime.now(UTC)
|
||||||
|
tree = Tree(
|
||||||
|
repo,
|
||||||
|
[
|
||||||
|
TreeItem(
|
||||||
|
digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||||
|
object_type = b"blob",
|
||||||
|
size = 123,
|
||||||
|
permissions = 0o744,
|
||||||
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
|
name = "xyz",
|
||||||
|
),
|
||||||
|
TreeItem(
|
||||||
|
digest = Digest(bytes([123]) * repo._cas._digest_size),
|
||||||
|
object_type = b"slnk",
|
||||||
|
size = 42,
|
||||||
|
permissions = 0o777,
|
||||||
|
modification_timestamp_us = timestamp_us_from_time(now),
|
||||||
|
name = "foobar",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
||||||
|
|
||||||
|
obj0 = repo.add_tree(tree)
|
||||||
|
assert obj0.object_type == b"tree"
|
||||||
|
assert obj0.tree == tree
|
||||||
|
|
||||||
|
obj1 = repo.get_tree(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
assert obj1.tree == obj0.tree
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_write_snapshot(repo: Repository):
|
||||||
|
snapshot = Snapshot(
|
||||||
|
repo = repo,
|
||||||
|
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
||||||
|
parents = [
|
||||||
|
Digest(bytes([123]) * repo._cas._digest_size),
|
||||||
|
Digest(bytes([124]) * repo._cas._digest_size),
|
||||||
|
],
|
||||||
|
repo_name = "test_repo",
|
||||||
|
timestamp_us = timestamp_us_from_time(datetime.now()),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
||||||
|
|
||||||
|
obj0 = repo.add_snapshot(snapshot)
|
||||||
|
assert obj0.object_type == b"snap"
|
||||||
|
assert obj0.snapshot == snapshot
|
||||||
|
|
||||||
|
obj1 = repo.get_snapshot(obj0.digest)
|
||||||
|
assert obj1.digest == obj0.digest
|
||||||
|
assert obj1.object_type == obj0.object_type
|
||||||
|
assert obj1.size == obj0.size
|
||||||
|
assert obj1.snapshot == obj0.snapshot
|
||||||
|
|
||||||
|
|
||||||
|
class TestTreeWalker(TreeWalker):
|
||||||
|
reports: list
|
||||||
|
|
||||||
|
def __init__(self, repo: Repository, dry_run: bool=False):
|
||||||
|
super().__init__(repo, dry_run=dry_run)
|
||||||
|
self.reports = []
|
||||||
|
|
||||||
|
def report(
|
||||||
|
self,
|
||||||
|
action: Action,
|
||||||
|
path: Path,
|
||||||
|
pstat: stat_result | None,
|
||||||
|
info: IgnoreCause | Exception | None = None
|
||||||
|
):
|
||||||
|
super().report(action, path, pstat, info)
|
||||||
|
self.reports.append((action, path, info if action != Action.REMOVE else None))
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_tree(tmp_dir: Path, repo: Repository):
|
||||||
|
dir = tmp_dir / "test0"
|
||||||
|
structure0 = {
|
||||||
|
"folder": {
|
||||||
|
"sub_folder": {
|
||||||
|
"empty_folder": {},
|
||||||
|
"foo.txt": b"Hello World!\n",
|
||||||
|
},
|
||||||
|
"test.py": b"print(\"Hello World!\")\n",
|
||||||
|
"bar.dat": bytes(range(256)),
|
||||||
|
},
|
||||||
|
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||||
|
"bsv_repo": {
|
||||||
|
"bsv_repository.config": b"[bsv]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
structure1 = {
|
||||||
|
"folder": {
|
||||||
|
"sub_folder": {
|
||||||
|
"empty_folder": {},
|
||||||
|
"foo.txt": b"Hello World!\n",
|
||||||
|
},
|
||||||
|
"bar.dat": bytes(range(256)) * 2,
|
||||||
|
},
|
||||||
|
"new_file": b"whatever",
|
||||||
|
"Another test with long name and spaces and a bang !": b"Should works.\n",
|
||||||
|
"bsv_repo": {
|
||||||
|
"bsv_repository.config": b"[bsv]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
expected0 = dict(structure0)
|
||||||
|
del expected0["bsv_repo"]
|
||||||
|
|
||||||
|
expected1 = dict(structure1)
|
||||||
|
del expected1["bsv_repo"]
|
||||||
|
|
||||||
|
create_file_structure(dir, structure0)
|
||||||
|
|
||||||
|
def check(digest: Digest, value: dict | bytes):
|
||||||
|
if isinstance(value, dict):
|
||||||
|
tree = repo.get_tree(digest).tree
|
||||||
|
assert tree
|
||||||
|
assert list(map(lambda i: i.name, tree.items)) == sorted(value.keys())
|
||||||
|
for item in tree.items:
|
||||||
|
check(item.digest, value[item.name])
|
||||||
|
elif isinstance(value, bytes):
|
||||||
|
blob_obj = repo.get_blob(digest)
|
||||||
|
data = blob_obj.blob.reader().read()
|
||||||
|
assert data == value
|
||||||
|
|
||||||
|
walker = TestTreeWalker(repo)
|
||||||
|
obj0 = walker.add_tree(dir)
|
||||||
|
assert obj0.object_type == b"tree"
|
||||||
|
assert walker.reports == [
|
||||||
|
(Action.ADD, dir / "Another test with long name and spaces and a bang !", None),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.ADD, dir / "folder/bar.dat", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/empty_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder/foo.txt", None),
|
||||||
|
(Action.ADD, dir / "folder/sub_folder", None),
|
||||||
|
(Action.ADD, dir / "folder/test.py", None),
|
||||||
|
(Action.ADD, dir / "folder", None),
|
||||||
|
(Action.ADD, dir, None),
|
||||||
|
]
|
||||||
|
check(obj0.digest, expected0)
|
||||||
|
|
||||||
|
create_file_structure(dir, structure1)
|
||||||
|
|
||||||
|
walker.reports.clear()
|
||||||
|
obj1 = walker.add_tree(dir, source_digest=obj0.digest)
|
||||||
|
assert obj0.object_type == b"tree"
|
||||||
|
assert walker.reports == [
|
||||||
|
(Action.IGNORE, dir / "Another test with long name and spaces and a bang !", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "bsv_repo", IgnoreCause.IGNORE_RULE),
|
||||||
|
(Action.UPDATE, dir / "folder/bar.dat", None),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/empty_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder/foo.txt", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.IGNORE, dir / "folder/sub_folder", IgnoreCause.UNCHANGED),
|
||||||
|
(Action.REMOVE, dir / "folder/test.py", None),
|
||||||
|
(Action.UPDATE, dir / "folder", None),
|
||||||
|
(Action.ADD, dir / "new_file", None),
|
||||||
|
(Action.UPDATE, dir, None),
|
||||||
|
]
|
||||||
|
check(obj1.digest, expected1)
|
||||||
|
|
||||||
|
|
||||||
|
def create_file_structure(dst: Path, value: dict | bytes):
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
if dst.is_dir():
|
||||||
|
rmtree(str(dst))
|
||||||
|
if not dst.is_file() or dst.read_bytes() != value:
|
||||||
|
dst.write_bytes(value)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
if dst.is_file():
|
||||||
|
dst.unlink()
|
||||||
|
if not dst.is_dir():
|
||||||
|
dst.mkdir()
|
||||||
|
|
||||||
|
items = sorted(value.items())
|
||||||
|
fs_paths = sorted(dst.iterdir())
|
||||||
|
|
||||||
|
item_index = 0
|
||||||
|
fs_path_index = 0
|
||||||
|
|
||||||
|
while item_index < len(value) or fs_path_index < len(fs_paths):
|
||||||
|
name, subitem = items[item_index] if item_index < len(items) else (None, None)
|
||||||
|
fs_path = fs_paths[fs_path_index] if fs_path_index < len(fs_paths) else None
|
||||||
|
|
||||||
|
if name and fs_path:
|
||||||
|
if name < fs_path.name:
|
||||||
|
fs_path = None
|
||||||
|
elif name > fs_path.name:
|
||||||
|
name = None
|
||||||
|
|
||||||
|
if name:
|
||||||
|
item_index += 1
|
||||||
|
if fs_path:
|
||||||
|
fs_path_index += 1
|
||||||
|
|
||||||
|
if name:
|
||||||
|
create_file_structure(dst / name, subitem) # type: ignore
|
||||||
|
elif fs_path and fs_path.is_dir():
|
||||||
|
rmtree(fs_path)
|
||||||
|
elif fs_path:
|
||||||
|
fs_path.unlink()
|
||||||
|
else:
|
||||||
|
raise TypeError(f"invalid type {type(value).__name__} for parameter value")
|
||||||
|
|
||||||
|
|
||||||
|
def make_random_file(path: Path, size: int):
|
||||||
|
with path.open("wb") as stream:
|
||||||
|
for chunk_size in iter_chunks(size):
|
||||||
|
stream.write(randbytes(chunk_size))
|
||||||
|
|
||||||
|
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
|
||||||
|
num_full_chunks = (size - 1) // chunk_size
|
||||||
|
for _ in range(num_full_chunks):
|
||||||
|
yield chunk_size
|
||||||
|
offset = num_full_chunks * chunk_size
|
||||||
|
if offset != size:
|
||||||
|
yield size - offset
|
||||||
@@ -55,7 +55,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
cas = SimpleCas(
|
cas = SimpleCas(
|
||||||
@@ -68,7 +70,9 @@ def test_simple_cas(tmp_dir: Path):
|
|||||||
|
|
||||||
obj = cas.read(digest)
|
obj = cas.read(digest)
|
||||||
assert obj is not None
|
assert obj is not None
|
||||||
|
assert obj.digest == digest
|
||||||
assert obj.object_type == b"blob"
|
assert obj.object_type == b"blob"
|
||||||
|
assert obj.size == len(data)
|
||||||
assert obj.data == data
|
assert obj.data == data
|
||||||
|
|
||||||
digest2 = cas.write(b"blob", data)
|
digest2 = cas.write(b"blob", data)
|
||||||
18
tests/test_bsv/__init__.py
Normal file
18
tests/test_bsv/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""pybsv test module."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
270
tests/test_bsv/test_cli.py
Normal file
270
tests/test_bsv/test_cli.py
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
from typing import TYPE_CHECKING, Literal, NamedTuple
|
||||||
|
|
||||||
|
from click.testing import CliRunner
|
||||||
|
from hypothesis import given
|
||||||
|
import hypothesis.strategies as st
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bsv import cli
|
||||||
|
from bsv.vfs import Permissions
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Generator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def runner(tmp_path: Path) -> CliRunner:
|
||||||
|
runner = CliRunner(env={"BSV_REPO": str(tmp_path)})
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def make_runner() -> Generator[CliRunner, None, None]:
|
||||||
|
with TemporaryDirectory(prefix="test_vfs_") as tmp:
|
||||||
|
runner = CliRunner(env={"BSV_REPO": tmp})
|
||||||
|
yield runner
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# mkdir
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_fails_with_relative_path(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "test").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "test"])
|
||||||
|
assert result.exit_code == 2
|
||||||
|
assert "test is not an absolute path" in result.stderr
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_default(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "test").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "test").is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_multiple_dirs(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "foo").exists()
|
||||||
|
assert not (tmp_path / "bar").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/foo", "/bar"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "foo").is_dir()
|
||||||
|
assert (tmp_path / "bar").is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_nested_fails_without_parents(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "foo").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/foo/bar"])
|
||||||
|
assert result.exit_code == 1
|
||||||
|
assert result.stderr == "/foo does not exist\n"
|
||||||
|
assert result.stdout == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_nested(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "foo/bar").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "--parents", "/foo/bar"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "foo/bar").is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_message_if_exists(tmp_path: Path, runner: CliRunner):
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "test").is_dir()
|
||||||
|
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/test"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == "/test already exists\n"
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "test").is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_mode(tmp_path: Path, runner: CliRunner):
|
||||||
|
assert not (tmp_path / "test").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/test", "--mode=741"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "test").is_dir()
|
||||||
|
assert (tmp_path / "test").stat().st_mode & 0o7777 == 0o741
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_verbose(tmp_path: Path, runner: CliRunner):
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "/foo"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
assert result.stdout == ""
|
||||||
|
assert (tmp_path / "foo").is_dir()
|
||||||
|
assert not (tmp_path / "bar").exists()
|
||||||
|
result = runner.invoke(cli.cli, ["mkdir", "--verbose", "/foo", "/bar"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == "/foo already exists\n"
|
||||||
|
assert result.stdout == "Created /bar\n"
|
||||||
|
assert (tmp_path / "foo").is_dir()
|
||||||
|
assert (tmp_path / "bar").is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# ls
|
||||||
|
|
||||||
|
|
||||||
|
def permissions(target: Literal["file", "dir"] = "file"):
|
||||||
|
return st.builds(
|
||||||
|
Permissions,
|
||||||
|
st.sampled_from(
|
||||||
|
[
|
||||||
|
0o0400,
|
||||||
|
0o0440,
|
||||||
|
0o0444,
|
||||||
|
0o0600,
|
||||||
|
0o0640,
|
||||||
|
0o0644,
|
||||||
|
0o0664,
|
||||||
|
0o0750,
|
||||||
|
0o0755,
|
||||||
|
0o0777,
|
||||||
|
]
|
||||||
|
if target == "file"
|
||||||
|
else [
|
||||||
|
0o0400,
|
||||||
|
0o0600,
|
||||||
|
0o0640,
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Tree(NamedTuple):
|
||||||
|
type: Literal["file", "dir"]
|
||||||
|
name: str
|
||||||
|
perms: Permissions
|
||||||
|
time: datetime
|
||||||
|
content: bytes | list[Tree]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type_prefix(self) -> str:
|
||||||
|
if self.type == "dir":
|
||||||
|
return "d"
|
||||||
|
return "-"
|
||||||
|
|
||||||
|
def build(self, parent: Path) -> None:
|
||||||
|
path = parent / self.name
|
||||||
|
if isinstance(self.content, list):
|
||||||
|
path.mkdir(mode=self.perms.unix_perms)
|
||||||
|
for child in self.content:
|
||||||
|
child.build(path)
|
||||||
|
else:
|
||||||
|
path.write_bytes(self.content)
|
||||||
|
path.chmod(self.perms.unix_perms)
|
||||||
|
ts = self.time.timestamp()
|
||||||
|
os.utime(path, (ts, ts))
|
||||||
|
|
||||||
|
|
||||||
|
def filenames() -> st.SearchStrategy:
|
||||||
|
return st.text(
|
||||||
|
st.characters(exclude_categories=["Cc", "Cs"], exclude_characters='<>:"/\\|!*'),
|
||||||
|
min_size=1,
|
||||||
|
max_size=255,
|
||||||
|
).filter(lambda t: len(t.encode()) < 256 and t not in (".", ".."))
|
||||||
|
|
||||||
|
|
||||||
|
@st.composite
|
||||||
|
def trees(draw: st.DrawFn, max_depth: int = 3) -> Tree:
|
||||||
|
file_type = draw(st.sampled_from(["file", "dir"]))
|
||||||
|
content = (
|
||||||
|
st.binary()
|
||||||
|
if file_type == "file"
|
||||||
|
else st.lists(
|
||||||
|
trees(max_depth - 1),
|
||||||
|
unique_by=lambda t: t.name,
|
||||||
|
max_size=0 if max_depth == 0 else 10,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return Tree(
|
||||||
|
file_type, # type: ignore
|
||||||
|
draw(filenames()),
|
||||||
|
draw(permissions(file_type)), # type: ignore
|
||||||
|
draw(
|
||||||
|
st.datetimes(
|
||||||
|
min_value=datetime(1902, 1, 1),
|
||||||
|
max_value=datetime(2100, 1, 1),
|
||||||
|
timezones=st.just(UTC),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
draw(content),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def trees_lists(max_depth: int = 3) -> st.SearchStrategy:
|
||||||
|
return st.lists(trees(max_depth=max_depth), unique_by=lambda t: t.name)
|
||||||
|
|
||||||
|
|
||||||
|
@given(trees=trees_lists(max_depth=0))
|
||||||
|
def test_ls(trees: list[Tree]):
|
||||||
|
with make_runner() as runner:
|
||||||
|
path = Path(runner.env["BSV_REPO"] or "")
|
||||||
|
for tree in trees:
|
||||||
|
tree.build(path)
|
||||||
|
|
||||||
|
result = runner.invoke(cli.cli, ["ls", "-lA"])
|
||||||
|
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.stderr == ""
|
||||||
|
|
||||||
|
trees.sort(key=lambda t: t.name)
|
||||||
|
lines = [line for line in result.stdout.splitlines() if line != "\n"]
|
||||||
|
|
||||||
|
for line, tree in zip(lines, trees, strict=True):
|
||||||
|
match = re.fullmatch(
|
||||||
|
r"""
|
||||||
|
([dl-])([r-][w-][x-][r-][w-][x-][r-][w-][x-])
|
||||||
|
\ +(\d+)
|
||||||
|
\ (\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})
|
||||||
|
\ ([^\n]+)
|
||||||
|
""",
|
||||||
|
line,
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
assert match
|
||||||
|
assert match[1] == tree.type_prefix
|
||||||
|
assert match[2] == str(tree.perms)
|
||||||
|
if tree.type_prefix != "d":
|
||||||
|
assert match[3] == str(len(tree.content))
|
||||||
|
assert match[4] == tree.time.astimezone().replace(tzinfo=None).isoformat(
|
||||||
|
" ", "seconds"
|
||||||
|
)
|
||||||
|
assert match[5] == tree.name
|
||||||
|
|
||||||
|
pass
|
||||||
56
tests/test_bsv/test_cli_utils.py
Normal file
56
tests/test_bsv/test_cli_utils.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Tests for cli_utils.py."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from bsv.cli_utils import format_human_byte_size
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_human_byte_size():
|
||||||
|
assert format_human_byte_size(0) == "0B"
|
||||||
|
assert format_human_byte_size(1) == "1B"
|
||||||
|
assert format_human_byte_size(9) == "9B"
|
||||||
|
assert format_human_byte_size(10) == "10B"
|
||||||
|
assert format_human_byte_size(99) == "99B"
|
||||||
|
assert format_human_byte_size(100) == "100B"
|
||||||
|
assert format_human_byte_size(999) == "999B"
|
||||||
|
assert format_human_byte_size(1000) == "1000B"
|
||||||
|
assert format_human_byte_size(1023) == "1023B"
|
||||||
|
assert format_human_byte_size(2**10) == "1KiB"
|
||||||
|
assert format_human_byte_size(int(1.23456 * 2**10)) == "1.23KiB"
|
||||||
|
assert format_human_byte_size(9 * 2**10) == "9KiB"
|
||||||
|
assert format_human_byte_size(10 * 2**10 - 1) == "10KiB"
|
||||||
|
assert format_human_byte_size(int(98.76543 * 2**10)) == "98.8KiB"
|
||||||
|
assert format_human_byte_size(99 * 2**10 - 1) == "99KiB"
|
||||||
|
assert format_human_byte_size(100 * 2**10 - 1) == "100KiB"
|
||||||
|
assert format_human_byte_size(int(192.8374 * 2**10)) == "193KiB"
|
||||||
|
assert format_human_byte_size(999 * 2**10 - 1) == "999KiB"
|
||||||
|
assert format_human_byte_size(1000 * 2**10 - 1) == "1000KiB"
|
||||||
|
assert format_human_byte_size(2**20 - 1) == "1MiB"
|
||||||
|
assert format_human_byte_size(2**20) == "1MiB"
|
||||||
|
assert format_human_byte_size(2**30) == "1GiB"
|
||||||
|
assert format_human_byte_size(2**40) == "1TiB"
|
||||||
|
assert format_human_byte_size(2**50) == "1PiB"
|
||||||
|
assert format_human_byte_size(2**60) == "1EiB"
|
||||||
|
assert format_human_byte_size(2**70) == "1ZiB"
|
||||||
|
assert format_human_byte_size(2**80) == "1YiB"
|
||||||
|
assert format_human_byte_size(2**90) == "1RiB"
|
||||||
|
assert format_human_byte_size(2**100 - 2**80) == "1QiB"
|
||||||
|
assert format_human_byte_size(2**100) == "1QiB"
|
||||||
|
assert format_human_byte_size(2**110 - 2**90) == "1024QiB"
|
||||||
|
assert format_human_byte_size(2**110) == "1024QiB"
|
||||||
|
assert format_human_byte_size(2**120) == "1048576QiB"
|
||||||
394
tests/test_bsv/test_vfs.py
Normal file
394
tests/test_bsv/test_vfs.py
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# pybsv - Backup, Synchronization, Versioning.
|
||||||
|
# Copyright (C) 2025 Simon Boyé
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""Tests for the `VirtualFileSystem` class and related stuff."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path, PurePosixPath
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bsv.vfs import FileMetadata, FsError, Permissions, VirtualFileSystem
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fs(tmp_path: Path) -> VirtualFileSystem:
|
||||||
|
"""Fixture that returns a `VirtualFileSystem`."""
|
||||||
|
return VirtualFileSystem(tmp_path)
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# Permissions
|
||||||
|
|
||||||
|
|
||||||
|
def test_permissions():
|
||||||
|
perm0 = Permissions(0o1234)
|
||||||
|
assert perm0.unix_perms == 0o1234
|
||||||
|
|
||||||
|
perm1 = Permissions("752")
|
||||||
|
assert perm1.unix_perms == 0o752
|
||||||
|
|
||||||
|
assert perm0 == perm0
|
||||||
|
assert perm0 != perm1
|
||||||
|
|
||||||
|
assert repr(perm0) == "Permissions(0o1234)"
|
||||||
|
assert repr(perm1) == "Permissions(0o0752)"
|
||||||
|
|
||||||
|
assert str(perm0) == "-w--wxr-T"
|
||||||
|
assert str(perm1) == "rwxr-x-w-"
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# FileMetadata
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_metadata():
|
||||||
|
path = PurePosixPath("/some_dir/some_file")
|
||||||
|
permissions = Permissions(0o1234)
|
||||||
|
mod_time = datetime(2025, 7, 12, 12, 34, 56, tzinfo=UTC)
|
||||||
|
|
||||||
|
file_md = FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert file_md.path == path
|
||||||
|
assert file_md.type == "file"
|
||||||
|
assert file_md.permissions == permissions
|
||||||
|
assert file_md.modification_time == mod_time
|
||||||
|
assert file_md.byte_size == 123
|
||||||
|
assert file_md.unix_mode == "--w--wxr-T"
|
||||||
|
assert not file_md.is_hidden
|
||||||
|
assert file_md.is_file
|
||||||
|
assert not file_md.is_dir
|
||||||
|
assert not file_md.is_symlink
|
||||||
|
assert not file_md.is_other
|
||||||
|
|
||||||
|
dir_md = FileMetadata(
|
||||||
|
path,
|
||||||
|
type="dir",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
assert dir_md.type == "dir"
|
||||||
|
assert not dir_md.is_file
|
||||||
|
assert dir_md.is_dir
|
||||||
|
assert not dir_md.is_symlink
|
||||||
|
assert not dir_md.is_other
|
||||||
|
|
||||||
|
symlink_md = FileMetadata(
|
||||||
|
path,
|
||||||
|
type="symlink",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
assert symlink_md.type == "symlink"
|
||||||
|
assert not symlink_md.is_file
|
||||||
|
assert not symlink_md.is_dir
|
||||||
|
assert symlink_md.is_symlink
|
||||||
|
assert not symlink_md.is_other
|
||||||
|
|
||||||
|
other_md = FileMetadata(
|
||||||
|
path,
|
||||||
|
type="other",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
assert other_md.type == "other"
|
||||||
|
assert not other_md.is_file
|
||||||
|
assert not other_md.is_dir
|
||||||
|
assert not other_md.is_symlink
|
||||||
|
assert other_md.is_other
|
||||||
|
|
||||||
|
assert FileMetadata(
|
||||||
|
PurePosixPath("/some_dir/.some_file"),
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
).is_hidden
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_metadata_eq():
|
||||||
|
path = PurePosixPath("/some_dir/some_file")
|
||||||
|
permissions = Permissions(0o1234)
|
||||||
|
mod_time = datetime(2025, 7, 12, 12, 34, 56, tzinfo=UTC)
|
||||||
|
|
||||||
|
md = FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
== md
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
PurePosixPath("/some_dir/some_other_file"),
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
!= md
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
path,
|
||||||
|
type="dir",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
!= md
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=Permissions(0o0752),
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
!= md
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=datetime(2025, 1, 2, 3, 4, 5),
|
||||||
|
byte_size=123,
|
||||||
|
)
|
||||||
|
!= md
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
FileMetadata(
|
||||||
|
path,
|
||||||
|
type="file",
|
||||||
|
permissions=permissions,
|
||||||
|
modification_time=mod_time,
|
||||||
|
byte_size=124,
|
||||||
|
)
|
||||||
|
!= md
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# mkdir
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_fails_with_relative_path(fs: VirtualFileSystem):
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.mkdir("test")
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_default(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
fs.mkdir("/test")
|
||||||
|
assert fs.is_dir("/test")
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_nested_fails_without_parents(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/foo")
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.mkdir("/foo/bar")
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_nested(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
fs.mkdir("/test/foobar", parents=True)
|
||||||
|
assert fs.is_dir("/test/foobar")
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_fails_if_exists(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/foo")
|
||||||
|
fs.mkdir("/foo")
|
||||||
|
assert fs.is_dir("/foo")
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.mkdir("/foo")
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_exists_ok(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
fs.mkdir("/test")
|
||||||
|
assert fs.is_dir("/test")
|
||||||
|
fs.mkdir("/test", exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_exists_ok_fail_if_file(fs: VirtualFileSystem):
|
||||||
|
fs.write_bytes("/test", b"test")
|
||||||
|
assert fs.is_file("/test")
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.mkdir("/test", exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mkdir_mode(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
permissions = Permissions(0o741)
|
||||||
|
fs.mkdir("/test", mode=permissions)
|
||||||
|
assert fs.is_dir("/test")
|
||||||
|
assert fs.metadata("/test").permissions == permissions
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# read_bytes / write_bytes
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_write_bytes(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
|
||||||
|
fs.write_bytes("/test", b"This is a test.")
|
||||||
|
assert fs.read_bytes("/test") == b"This is a test."
|
||||||
|
|
||||||
|
stream = BytesIO(b"Another test.")
|
||||||
|
fs.write_bytes("/test", stream)
|
||||||
|
assert fs.read_bytes("/test") == b"Another test."
|
||||||
|
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.read_bytes("/does_not_exist")
|
||||||
|
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.write_bytes("/does_not_exist/foobar", b"")
|
||||||
|
|
||||||
|
|
||||||
|
def test_open_read_write(fs: VirtualFileSystem):
|
||||||
|
assert not fs.exists("/test")
|
||||||
|
|
||||||
|
with fs.open_write("/test") as stream:
|
||||||
|
stream.write(b"foo")
|
||||||
|
stream.write(b"bar")
|
||||||
|
|
||||||
|
assert fs.exists("/test")
|
||||||
|
with fs.open_read("/test") as stream:
|
||||||
|
assert stream.read(3) == b"foo"
|
||||||
|
assert stream.read(3) == b"bar"
|
||||||
|
assert stream.read() == b""
|
||||||
|
|
||||||
|
# Test overwrite
|
||||||
|
with fs.open_write("/test") as stream:
|
||||||
|
stream.write(b"baz")
|
||||||
|
|
||||||
|
with fs.open_read("/test") as stream:
|
||||||
|
assert stream.read() == b"baz"
|
||||||
|
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.open_read("/does_not_exist")
|
||||||
|
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.open_write("/does_not_exist/foobar")
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# metadata
|
||||||
|
|
||||||
|
|
||||||
|
def test_metadata(fs: VirtualFileSystem):
|
||||||
|
file_permissions = Permissions(0o754)
|
||||||
|
file_time = datetime(2025, 5, 17, 13, 57, 32, tzinfo=UTC)
|
||||||
|
file_content = b"This is a test\n"
|
||||||
|
|
||||||
|
fs.write_bytes("/test_file", file_content)
|
||||||
|
fs.set_permissions("/test_file", file_permissions)
|
||||||
|
fs.set_modification_time("/test_file", file_time)
|
||||||
|
|
||||||
|
md = fs.metadata("/test_file")
|
||||||
|
assert md.path == PurePosixPath("/test_file")
|
||||||
|
assert md.permissions == file_permissions
|
||||||
|
assert md.type == "file"
|
||||||
|
assert md.modification_time == file_time
|
||||||
|
assert md.byte_size == len(file_content)
|
||||||
|
assert not md.is_hidden
|
||||||
|
assert fs.metadata("/test_file") == md
|
||||||
|
assert fs.is_file("/test_file")
|
||||||
|
assert not fs.is_dir("/test_file")
|
||||||
|
assert not fs.is_symlink("/test_file")
|
||||||
|
assert not fs.is_other("/test_file")
|
||||||
|
|
||||||
|
fs.set_permissions("/test_file", Permissions(0o644))
|
||||||
|
assert fs.metadata("/test_file") != md
|
||||||
|
|
||||||
|
fs.mkdir("/.test_dir")
|
||||||
|
md = fs.metadata("/.test_dir")
|
||||||
|
assert md.type == "dir"
|
||||||
|
assert fs.metadata("/.test_dir").is_hidden
|
||||||
|
assert not fs.is_file("/.test_dir")
|
||||||
|
assert fs.is_dir("/.test_dir")
|
||||||
|
assert not fs.is_symlink("/.test_dir")
|
||||||
|
assert not fs.is_other("/.test_dir")
|
||||||
|
|
||||||
|
fs.make_link("/test_link", "/link_target")
|
||||||
|
md = fs.metadata("/test_link")
|
||||||
|
assert md.type == "symlink"
|
||||||
|
assert not fs.is_file("/test_link")
|
||||||
|
assert not fs.is_dir("/test_link")
|
||||||
|
assert fs.is_symlink("/test_link")
|
||||||
|
assert not fs.is_other("/test_link")
|
||||||
|
|
||||||
|
assert fs.metadata_or_none("/does_not_exist") is None
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
fs.metadata("/does_not_exist")
|
||||||
|
|
||||||
|
|
||||||
|
########################################################################################
|
||||||
|
# iter_dir
|
||||||
|
|
||||||
|
|
||||||
|
def test_iter_dir(fs: VirtualFileSystem):
|
||||||
|
expected = [
|
||||||
|
(PurePosixPath("/dir"), "dir"),
|
||||||
|
(PurePosixPath("/file"), "file"),
|
||||||
|
(PurePosixPath("/link"), "symlink"),
|
||||||
|
]
|
||||||
|
for path, file_type in expected:
|
||||||
|
if file_type == "dir":
|
||||||
|
fs.mkdir(path)
|
||||||
|
elif file_type == "file":
|
||||||
|
fs.write_bytes(path, b"")
|
||||||
|
elif file_type == "symlink":
|
||||||
|
fs.make_link(path, "/foobar")
|
||||||
|
|
||||||
|
items_metadata = sorted(fs.iter_dir("/"))
|
||||||
|
for md, [path, file_type] in zip(items_metadata, expected, strict=True):
|
||||||
|
assert md.path == path
|
||||||
|
assert md.type == file_type
|
||||||
|
|
||||||
|
|
||||||
|
def test_iter_dir_failure(fs: VirtualFileSystem):
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
list(fs.iter_dir("/test"))
|
||||||
|
|
||||||
|
fs.write_bytes("/test", b"")
|
||||||
|
with pytest.raises(FsError):
|
||||||
|
list(fs.iter_dir("/test"))
|
||||||
@@ -1,111 +0,0 @@
|
|||||||
# bsv - Backup, Synchronization, Versioning
|
|
||||||
# Copyright (C) 2023 Simon Boyé
|
|
||||||
#
|
|
||||||
# This program is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Affero General Public License as published by
|
|
||||||
# the Free Software Foundation, either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Affero General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
from __future__ import annotations
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from io import BytesIO
|
|
||||||
from pathlib import Path
|
|
||||||
from random import randbytes
|
|
||||||
from typing import Iterator
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
from bsv.repository import Repository, Snapshot, Tree, TreeItem, create_repository, timestamp_from_time
|
|
||||||
from bsv.simple_cas.cas import Digest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def tmp_dir():
|
|
||||||
with TemporaryDirectory(prefix="simple_cas_") as tmp_dir:
|
|
||||||
yield Path(tmp_dir)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def repo(tmp_dir):
|
|
||||||
return create_repository(
|
|
||||||
tmp_dir / "bsv",
|
|
||||||
"test_repo",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_read_write_blob(tmp_dir: Path, repo: Repository):
|
|
||||||
path = tmp_dir / "test.dat"
|
|
||||||
make_random_file(path, 1 << 20)
|
|
||||||
|
|
||||||
with path.open("rb") as stream:
|
|
||||||
digest = repo.add_blob(stream)
|
|
||||||
|
|
||||||
blob = repo.get_blob(digest)
|
|
||||||
data = blob.read()
|
|
||||||
|
|
||||||
with path.open("rb") as stream:
|
|
||||||
assert data == stream.read()
|
|
||||||
|
|
||||||
|
|
||||||
def test_read_write_tree(repo: Repository):
|
|
||||||
now = datetime.now(UTC)
|
|
||||||
tree = Tree(
|
|
||||||
repo,
|
|
||||||
[
|
|
||||||
TreeItem(
|
|
||||||
"xyz",
|
|
||||||
Digest(bytes([42]) * repo._cas._digest_size),
|
|
||||||
0o744,
|
|
||||||
creation_timestamp = timestamp_from_time(now),
|
|
||||||
modification_timestamp = timestamp_from_time(now),
|
|
||||||
),
|
|
||||||
TreeItem(
|
|
||||||
"foobar",
|
|
||||||
Digest(bytes([123]) * repo._cas._digest_size),
|
|
||||||
0o777,
|
|
||||||
creation_timestamp = timestamp_from_time(now),
|
|
||||||
modification_timestamp = timestamp_from_time(now),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
assert Tree.from_bytes(repo, tree.to_bytes()) == tree
|
|
||||||
|
|
||||||
digest = repo.add_tree(tree)
|
|
||||||
assert repo.get_tree(digest) == tree
|
|
||||||
|
|
||||||
|
|
||||||
def test_read_write_snapshot(repo: Repository):
|
|
||||||
snapshot = Snapshot(
|
|
||||||
repo = repo,
|
|
||||||
tree_digest = Digest(bytes([42]) * repo._cas._digest_size),
|
|
||||||
repo_name = "test_repo",
|
|
||||||
timestamp = timestamp_from_time(datetime.now()),
|
|
||||||
)
|
|
||||||
|
|
||||||
assert Snapshot.from_bytes(repo, snapshot.to_bytes()) == snapshot
|
|
||||||
|
|
||||||
digest = repo.add_snapshot(snapshot)
|
|
||||||
assert repo.get_snapshot(digest) == snapshot
|
|
||||||
|
|
||||||
|
|
||||||
def make_random_file(path: Path, size: int):
|
|
||||||
with path.open("wb") as stream:
|
|
||||||
for chunk_size in iter_chunks(size):
|
|
||||||
stream.write(randbytes(chunk_size))
|
|
||||||
|
|
||||||
def iter_chunks(size: int, chunk_size: int=1 << 16) -> Iterator[int]:
|
|
||||||
num_full_chunks = (size - 1) // chunk_size
|
|
||||||
for _ in range(num_full_chunks):
|
|
||||||
yield chunk_size
|
|
||||||
offset = num_full_chunks * chunk_size
|
|
||||||
if offset != size:
|
|
||||||
yield size - offset
|
|
||||||
Reference in New Issue
Block a user