# bsv - Backup, Synchronization, Versioning
# Copyright (C) 2023  Simon Boyé
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime as DateTime
import hashlib
from io import BytesIO
from pathlib import Path, PurePosixPath
import platform
import tomllib
from typing import TYPE_CHECKING, BinaryIO, Callable, Self, Type

from fastcdc import fastcdc
import tomlkit

from bsv import __version__
from bsv.exception import ConfigError
from bsv.object import ObjectInfo
from bsv.path_map import PathMap
from bsv.simple_cas import SimpleCas
from bsv.simple_cas.cas import Digest, SimpleCas
from bsv.util import Hash, read_exact, read_exact_or_eof, time_from_timestamp_us, timestamp_us_from_time

if TYPE_CHECKING:
    from bsv.tree_walker import TreeWalker


DEFAULT_MIN_CHUNK_SIZE = 1 << 12
DEFAULT_AVG_CHUNK_SIZE = 1 << 16
DEFAULT_MAX_CHUNK_SIZE = 1 << 20


class Repository:
    _path: Path
    _name: str

    _cas: SimpleCas
    _min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE
    _avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE
    _max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE

    _path_map: PathMap
    # _remotes: list[object]

    _context_depth: int = 0

    def __init__(self, path: Path):
        self._path = path

        with self.config_file.open("rb") as stream:
            config = tomllib.load(stream)

        bsv = config.get("bsv", {})

        self._name = bsv.get("name") or platform.node()

        self._cas = make_cas(
            bsv.get("cas"),
            self._path,
            lambda: hashlib.new(bsv.get("hash")), # type: ignore
        )
        self._min_chunk_size = bsv.get("min_chunk_size")
        self._avg_chunk_size = bsv.get("avg_chunk_size")
        self._max_chunk_size = bsv.get("max_chunk_size")

        self._path_map = PathMap.from_obj(bsv.get("path_map", []))

    @property
    def path(self) -> Path:
        return self._path

    @property
    def config_file(self) -> Path:
        return self.path / "bsv_config.toml"

    @property
    def name(self) -> str:
        return self._name

    @property
    def path_map(self) -> PathMap:
        return self._path_map.clone()

    def get_blob(self, digest: Digest) -> BlobObject:
        with self:
            obj, blob = self._read(digest, object_type=b"blob")
            return BlobObject(
                digest = obj.digest,
                object_type = obj.object_type,
                size = obj.size,
                blob = blob,
            )

    def add_blob(self, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
        with self:
            return self._write(b"blob", stream, dry_run=dry_run)

    def get_symlink(self, digest: Digest) -> SymlinkObject:
        with self:
            obj = self._cas.read(digest, object_type=b"slnk")
            return SymlinkObject(
                digest = obj.digest,
                object_type = obj.object_type,
                size = obj.size,
                symlink = Symlink.from_bytes(self, obj.data),
            )

    def add_symlink(self, symlink: Symlink, *, dry_run: bool=False) -> SymlinkObject:
        with self:
            data = symlink.to_bytes()
            return SymlinkObject(
                digest = self._cas.write(b"slnk", data, dry_run=dry_run),
                object_type = b"slnk",
                size = len(data),
                symlink = symlink,
            )

    def add_symlink_from_fs_target(self, fs_symlink: Path, fs_target: Path, *, dry_run: bool=False) -> SymlinkObject:
        assert fs_symlink.is_absolute()
        return self.add_symlink(
            Symlink(
                repo = self,
                is_absolute = fs_target.is_absolute(),
                target = self._path_map.relative_bsv_path(fs_target, relative_to=fs_symlink),
            ),
            dry_run = dry_run,
        )

    def get_tree(self, digest: Digest) -> TreeObject:
        with self:
            obj = self._cas.read(digest, object_type=b"tree")
            return TreeObject(
                digest = obj.digest,
                object_type = obj.object_type,
                size = obj.size,
                tree = Tree.from_bytes(self, obj.data),
            )

    def add_tree(self, tree: Tree, *, dry_run: bool=False) -> TreeObject:
        with self:
            data = tree.to_bytes()
            return TreeObject(
                digest = self._cas.write(b"tree", data, dry_run=dry_run),
                object_type = b"tree",
                size = len(data),
                tree = tree,
            )

    def add_tree_from_path(self, path: Path, *, dry_run: bool=False) -> TreeObject:
        from bsv.tree_walker import TreeWalker
        walker = TreeWalker(self, dry_run=dry_run)
        return walker.add_tree(path)

    def get_snapshot(self, digest: Digest) -> SnapshotObject:
        with self:
            obj = self._cas.read(digest, object_type=b"snap")
            return SnapshotObject(
                digest = obj.digest,
                object_type = obj.object_type,
                size = obj.size,
                snapshot = Snapshot.from_bytes(self, obj.data),
            )

    def add_snapshot(self, snapshot: Snapshot, *, dry_run: bool=False) -> SnapshotObject:
        with self:
            data = snapshot.to_bytes()
            return SnapshotObject(
                digest = self._cas.write(b"snap", data, dry_run=dry_run),
                object_type = b"snap",
                size = len(data),
                snapshot = snapshot,
            )

    # def take_snapshot(
    #     self,
    #     parent_digests: list[Digest] = [],
    #     *,
    #     walker_type: Type[TreeWalker] | None = None,
    #     dry_run: bool = False,
    # ):
    #     from bsv.tree_walker import TreeWalker

    #     walker = (walker_type or TreeWalker)(self, dry_run=dry_run)

    #     # parents = [
    #     #     self.get_snapshot(digest)
    #     #     for digest in parent_digests
    #     # ]
    #     parent = self.get_snapshot(parent_digests[0]) if parent_digests else None

    #     snapshot = Snapshot(
    #         repo = self,
    #         tree_digest = walker.add_virtual_tree(self._path_map, parent=),
    #         parents = parent_digests,
    #         repo_name = self._name,
    #         timestamp = timestamp_us_from_time(DateTime.now()),
    #     )
    #     return self.add_snapshot(snapshot, dry_run=dry_run)


    def _read(self, digest: Digest, object_type: bytes) -> tuple[ObjectInfo, Blob]:
        obj = self._cas.read(digest, object_type=object_type)
        stream = BytesIO(obj.data)
        return obj, Blob.from_stream(self, stream, digest_size=self._cas._digest_size)

    def _write(self, object_type: bytes, stream: BinaryIO, *, dry_run: bool=False) -> BlobObject:
        out = BytesIO()
        size = 0
        chunks = []
        for chunk in fastcdc(
            stream,
            min_size = self._min_chunk_size,
            avg_size = self._avg_chunk_size,
            max_size = self._max_chunk_size,
            fat = True,
        ):
            size += chunk.length
            digest = self._cas.write(b"chnk", chunk.data, dry_run=dry_run)
            chunks.append(Chunk(digest, chunk.length))
            out.write(digest.digest)
            out.write(chunk.length.to_bytes(4))
        return BlobObject(
            digest = self._cas.write(object_type, size.to_bytes(8) + out.getvalue()),
            object_type = object_type,
            size = 8 + len(out.getvalue()),
            blob = Blob(
                repo = self,
                size = size,
                chunks = chunks,
            )
        )

    def __enter__(self):
        if self._context_depth == 0:
            self._cas.__enter__()
        self._context_depth += 1
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._context_depth -= 1
        if self._context_depth == 0:
            return self._cas.__exit__(exc_type, exc_value, traceback)


def create_repository(
    destination: Path,
    name: str,
    cas: str = "simple",
    hash: str = "sha256",
    min_chunk_size: int = DEFAULT_MIN_CHUNK_SIZE,
    avg_chunk_size: int = DEFAULT_AVG_CHUNK_SIZE,
    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE,
):
    from datetime import datetime as DateTime
    from os import getlogin

    if not name:
        raise RuntimeError("repository name cannot be empty")
    if not destination.parent.exists():
        raise RuntimeError(f"destination directory {destination.parent} does not exists")
    if destination.exists() and not destination.is_dir():
        raise RuntimeError(f"destination {destination} exists but is not a directory")
    if destination.exists() and len(list(destination.iterdir())):
        raise RuntimeError(f"destination directory {destination} is not empty")

    try:
        destination.mkdir(exist_ok=True)
    except:
        raise RuntimeError(f"failed to create destination directory {destination}")

    bsv_table = tomlkit.table()
    bsv_table.add(tomlkit.comment("Name of the repository."))
    bsv_table.add(tomlkit.comment("Ideally, this should be unique among all connected repositories."))
    bsv_table.add("name", name)
    bsv_table.add(tomlkit.nl())
    bsv_table.add(tomlkit.comment("Mapping between bsv tree and the actual filesystem."))
    bsv_table.add("path_map", tomlkit.array())
    bsv_table.add("cas", cas)
    bsv_table.add("hash", hash)
    bsv_table.add("min_chunk_size", min_chunk_size)
    bsv_table.add("avg_chunk_size", avg_chunk_size)
    bsv_table.add("max_chunk_size", max_chunk_size)

    doc = tomlkit.document()
    doc.add(tomlkit.comment("bsv repository configuration"))
    doc.add(tomlkit.comment(f"Created by {getlogin()} on {DateTime.now().isoformat()}."))
    doc.add(tomlkit.nl())
    doc.add("bsv", bsv_table)

    config_path = destination / "bsv_config.toml"
    try:
        stream = config_path.open("w", encoding="utf-8")
    except:
        raise RuntimeError("failed to open configuration file {config_path}")

    with stream:
        tomlkit.dump(doc, stream)

    return Repository(destination)


def make_cas(cas_name: str, path: Path, hash_factory: Callable[[], Hash]) -> SimpleCas:
    if cas_name == "simple":
        return SimpleCas(path, hash_factory)
    raise ConfigError(f"unknown cas name {cas_name}")


@dataclass(slots=True)
class ChunkedObject:
    repo: Repository
    size: int
    chunks: list[Chunk]

    @classmethod
    def from_stream(cls, repo: Repository, stream: BinaryIO, digest_size: int) -> Self:
        self = cls(
            repo = repo,
            size = int.from_bytes(read_exact(stream, 8)),
            chunks = [],
        )
        while (chunk := Chunk.from_stream(stream, digest_size)) is not None:
            self.chunks.append(chunk)
        return self

    def reader(self) -> ChunkedObjectReader:
        return ChunkedObjectReader(self)


@dataclass(frozen=True, slots=True)
class Chunk:
    digest: Digest
    size: int

    @classmethod
    def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
        digest_bytes = read_exact_or_eof(stream, digest_size)
        if digest_bytes is None:
            return None
        digest = Digest(digest_bytes)

        return cls(
            digest = digest,
            size = int.from_bytes(read_exact(stream, 4)),
        )


class ChunkedObjectReader:
    _chunked_object: ChunkedObject
    _chunk_index: int = 0
    _chunk_data: bytes = b""

    def __init__(self, chunked_object: ChunkedObject):
        self._chunked_object = chunked_object

    def read(self, num_bytes: int = -1) -> bytes:
        chunks = self._chunked_object.chunks
        parts = [self._chunk_data]
        size = len(parts[-1])
        while (num_bytes < 0 or size < num_bytes) and self._chunk_index < len(chunks):
            parts.append(self.read1())
            size += len(parts[-1])
        if num_bytes >= 0:
            self._chunk_data = parts[-1][num_bytes - size:]
        else:
            self._chunk_data = b""
        return b"".join(parts)

    def read1(self) -> bytes:
        cas = self._chunked_object.repo._cas
        chunks = self._chunked_object.chunks
        if self._chunk_index == len(chunks):
            return b""
        object = cas.read(chunks[self._chunk_index].digest, object_type=b"chnk")
        self._chunk_index += 1
        return object.data


@dataclass(slots=True)
class Blob(ChunkedObject):
    pass

@dataclass(frozen=True, order=True, slots=True)
class BlobObject(ObjectInfo):
    blob: Blob


@dataclass(slots=True)
class Symlink:
    repo: Repository
    is_absolute: bool
    target: PurePosixPath

    @classmethod
    def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
        return cls(
            repo = repo,
            is_absolute = bool(read_exact(stream, 1)),
            target = PurePosixPath(stream.read().decode("utf-8")),
        )

    @classmethod
    def from_bytes(cls, repo: Repository, bytes: bytes) -> Self:
        stream = BytesIO(bytes)
        return cls.from_stream(repo, stream)

    def write(self, stream: BinaryIO):
        stream.write(self.is_absolute.to_bytes(1))
        stream.write(self.target.as_posix().encode("utf-8"))

    def to_bytes(self) -> bytes:
        stream = BytesIO()
        self.write(stream)
        return stream.getvalue()

@dataclass(frozen=True, order=True, slots=True)
class SymlinkObject(ObjectInfo):
    symlink: Symlink


@dataclass
class Tree:
    repo: Repository
    items: list[TreeItem]

    @property
    def total_size(self) -> int:
        return sum(
            item.size
            for item in self.items
        )

    @classmethod
    def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
        tree = Tree(repo, [])
        while (item := TreeItem.from_stream(stream, repo._cas._digest_size)) is not None:
            tree.items.append(item)
        return tree

    @classmethod
    def from_bytes(cls, repo: Repository, data: bytes) -> Self:
        stream = BytesIO(data)
        return cls.from_stream(repo, stream)

    def write(self, stream: BinaryIO):
        self.items.sort(key=lambda i: i.name)
        for item in self.items:
            item.write(stream)

    def to_bytes(self) -> bytes:
        stream = BytesIO()
        self.write(stream)
        return stream.getvalue()

@dataclass(frozen=True, order=True, slots=True)
class TreeObject(ObjectInfo):
    tree: Tree

    @property
    def total_size(self) -> int:
        return self.size + self.tree.total_size


@dataclass
class TreeItem:
    digest: Digest
    object_type: bytes
    size: int
    permissions: int
    modification_timestamp_us: int
    name: str

    def __init__(
        self,
        digest: Digest,
        object_type: bytes,
        size: int,
        permissions: int,
        modification_timestamp_us: int,
        name: str,
    ):
        if "/\\" in name:
            raise ValueError(f"invalid tree item name {name}")
        self.digest = digest
        self.object_type = object_type
        self.size = size
        self.permissions = permissions
        self.modification_timestamp_us = modification_timestamp_us
        self.name = name

    @property
    def modification_time(self) -> DateTime:
        return time_from_timestamp_us(self.modification_timestamp_us)
    @modification_time.setter
    def modification_time(self, time: DateTime):
        self.modification_timestamp_us = timestamp_us_from_time(time)

    @classmethod
    def from_stream(cls, stream: BinaryIO, digest_size: int) -> Self | None:
        digest_bytes = read_exact_or_eof(stream, digest_size)
        if digest_bytes is None:
            return None
        return TreeItem(
            digest = Digest(digest_bytes),
            object_type = read_exact(stream, 4),
            size = int.from_bytes(read_exact(stream, 8)),
            permissions = int.from_bytes(read_exact(stream, 2)),
            modification_timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
            name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
        )

    def write(self, stream: BinaryIO):
        stream.write(self.digest.digest)
        stream.write(self.object_type)
        stream.write(self.size.to_bytes(8))
        stream.write(self.permissions.to_bytes(2))
        stream.write(self.modification_timestamp_us.to_bytes(8, signed=True))
        name_bytes = self.name.encode("utf-8")
        stream.write(len(name_bytes).to_bytes(2))
        stream.write(name_bytes)


@dataclass
class Snapshot:
    repo: Repository
    tree_digest: Digest
    parents: list[Digest]
    repo_name: str
    timestamp_us: int

    def __post_init__(self):
        assert len(self.parents) < 256

    @property
    def time(self) -> DateTime:
        return time_from_timestamp_us(self.timestamp_us)
    @time.setter
    def time(self, time: DateTime):
        self.timestamp_us = timestamp_us_from_time(time)

    @classmethod
    def from_stream(cls, repo: Repository, stream: BinaryIO) -> Self:
        return Snapshot(
            repo = repo,
            tree_digest = Digest(read_exact(stream, repo._cas._digest_size)),
            parents = [
                Digest(read_exact(stream, repo._cas._digest_size))
                for _ in range(int.from_bytes(read_exact(stream, 1)))
            ],
            repo_name = read_exact(stream, int.from_bytes(read_exact(stream, 2))).decode("utf-8"),
            timestamp_us = int.from_bytes(read_exact(stream, 8), signed=True),
        )

    @classmethod
    def from_bytes(cls, repo: Repository, data: bytes) -> Self:
        stream = BytesIO(data)
        return cls.from_stream(repo, stream)

    def write(self, stream: BinaryIO):
        assert len(self.parents) < 256
        stream.write(self.tree_digest.digest)
        stream.write(len(self.parents).to_bytes(1))
        for parent in self.parents:
            stream.write(parent.digest)
        repo_name_bytes = self.repo_name.encode("utf-8")
        stream.write(len(repo_name_bytes).to_bytes(2))
        stream.write(repo_name_bytes)
        stream.write(self.timestamp_us.to_bytes(8, signed=True))

    def to_bytes(self) -> bytes:
        stream = BytesIO()
        self.write(stream)
        return stream.getvalue()

@dataclass(frozen=True, order=True, slots=True)
class SnapshotObject(ObjectInfo):
    snapshot: Snapshot