Start implementing a cas in separate crates.

This commit is contained in:
2021-08-01 17:06:50 +02:00
parent 468f81b86c
commit 5664beca57
19 changed files with 1241 additions and 315 deletions

13
cas-core/Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "cas-core"
version = "0.1.0"
authors = ["Simon Boyé <sim.boye@gmail.com>"]
edition = "2018"
license = "AGPL-3.0-or-later"
[dependencies]
thiserror = "1.0.25"
digest = { version = "0.9.0", features = ["alloc"] }
[dev-dependencies]
sha2 = "0.9.5"

40
cas-core/src/cas.rs Normal file
View File

@@ -0,0 +1,40 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use super::error::Result;
use super::object_id::ObjectId;
use super::object_type::ObjectType;
use super::object_metadata::ObjectMetadata;
// pub struct ObjectIdIterator;
pub trait Cas {
fn object_id_from_string(&self, hex: &str) -> Result<ObjectId>;
fn object_id_from_partial(&self, hex: &str) -> Result<ObjectId>;
fn has_object_id(&self, oid: &ObjectId) -> Result<bool>;
// fn iter_object_id(&self) -> Result<ObjectIdIterator>;
fn read_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, &dyn std::io::Read)>;
fn write_object(&mut self, otype: ObjectType, data: &mut dyn std::io::Read) -> Result<ObjectId>;
fn remove_object(&mut self, oid: &ObjectId) -> Result<()>;
fn read_ref(&self, key: &str) -> Result<ObjectId>;
fn write_ref(&mut self, key: &str, value: &ObjectId) -> Result<()>;
fn remove_ref(&mut self, key: &str) -> Result<()>;
}

122
cas-core/src/error.rs Normal file
View File

@@ -0,0 +1,122 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
// use std::path::PathBuf;
/// Result type used through cas-core.
pub type Result<T> = std::result::Result<T, Box<Error>>;
/// Error type used through cas-core.
#[non_exhaustive]
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
pub enum Error {
// #[error("failed to create repository: {message}")]
// RepositoryCreationFailed {
// message: String,
// source: Option<Box<dyn std::error::Error>>,
// },
// #[error("invalid object id: {message}")]
// InvalidObjectId {
// message: String,
// source: Option<Box<dyn std::error::Error>>,
// },
// #[error("invalid size (got: {size}, expected: {expected})")]
// InvalidSize {
// size: usize,
// expected: usize,
// },
// #[error("non-empty directory ({dir})")]
// NonEmptyDirectory {
// dir: PathBuf
// },
// #[error("invalid character(s) ({characters})")]
// InvalidCharacters {
// characters: String,
// },
// #[error("invalid object type ({otype:?})")]
// InvalidObjectType {
// otype: [u8; 4],
// },
// #[error("invalid object size (expected {expected}, got {size})")]
// InvalidObjectSize {
// size: u64,
// expected: u64,
// },
// #[error("unsupported file type")]
// UnsupportedFileType,
// #[error("invalid path ({path})")]
// InvalidPath { path: PathBuf },
// #[error("io error{}", format_optional_path(path))]
// IoError {
// source: std::io::Error,
// path: Option<PathBuf>,
// },
#[error("{0}")]
Error(String),
}
impl Error {
// pub fn repository_creation_failed<M: Into<String>>(message: M) -> Box<Error> {
// Box::new(Error::RepositoryCreationFailed {
// message: message.into(),
// source: None,
// })
// }
// pub fn repository_creation_failed_from<M: Into<String>>(source: Box<dyn std::error::Error>, message: M) -> Box<Error> {
// Box::new(Error::RepositoryCreationFailed {
// message: message.into(),
// source: Some(source),
// })
// }
// pub fn invalid_object_id<M: Into<String>>(message: M) -> Box<Error> {
// Box::new(Error::InvalidObjectId {
// message: message.into(),
// source: None,
// })
// }
// pub fn invalid_object_id_from<M: Into<String>>(source: Box<dyn std::error::Error>, message: M) -> Box<Error> {
// Box::new(Error::InvalidObjectId {
// message: message.into(),
// source: Some(source),
// })
// }
pub fn error<M: Into<String>>(message: M) -> Box<Error> {
Box::new(Error::Error(message.into()))
}
}
// fn format_optional_path(maybe_path: &Option<PathBuf>) -> String {
// match maybe_path {
// Some(path) => { format!(" ({:?})", path) },
// None => { String::new() }
// }
// }

44
cas-core/src/lib.rs Normal file
View File

@@ -0,0 +1,44 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
//! # cas-core
//!
//! `cas-core` provides traits and types to interface with content-addressable
//! storage.
#![feature(inherent_ascii_escape)]
extern crate thiserror;
extern crate digest;
mod error;
mod object_id;
mod object_type;
mod object_metadata;
mod pipeline;
mod cas;
pub use crate::{
error::{Error, Result},
object_id::{ObjectId, hex, write_hex},
object_type::{ObjectType},
object_metadata::{ObjectMetadata},
pipeline::{Pipeline, DefaultPipeline},
cas::{Cas},
};

212
cas-core/src/object_id.rs Normal file
View File

@@ -0,0 +1,212 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use std::fmt;
use std::str::{FromStr};
use std::sync::Arc;
use super::error::*;
/// A unique identifier for an object.
///
/// This is the handle used to reference an Object. This is an opaque type that uniquely identify an
/// object. It can be compared to another ObjectId, be hashed and that's about it.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ObjectId {
id: Arc<Vec<u8>>,
}
impl ObjectId {
pub fn new(id: &[u8]) -> ObjectId {
ObjectId {
id: Arc::new(id.into()),
}
}
pub fn id(&self) -> &[u8] {
self.id.as_slice()
}
}
impl FromStr for ObjectId {
type Err = Box<Error>;
fn from_str(id_str: &str) -> Result<ObjectId> {
Ok(ObjectId {
id: Arc::new(
CharPairs::new(id_str, false)
.map(parse_byte)
.collect::<Result<Vec<_>>>()?
),
})
}
}
impl fmt::Display for ObjectId {
fn fmt(&self, f: &mut fmt::Formatter) -> std::result::Result<(), fmt::Error> {
write_hex(f, self.id.as_slice())
}
}
impl fmt::Debug for ObjectId {
fn fmt(&self, f: &mut fmt::Formatter) -> std::result::Result<(), fmt::Error> {
write!(f, "ObjectId::new(\"{}\")", self)
}
}
pub fn hex(bytes: &[u8]) -> String {
let mut hex = String::with_capacity(bytes.len() * 2);
write_hex(&mut hex, bytes).expect("can't format bytes ?");
hex
}
pub fn write_hex<W: std::fmt::Write>(write: &mut W, bytes: &[u8])
-> std::result::Result<(), std::fmt::Error>
{
for byte in bytes.iter() {
write!(write, "{:02x}", byte)?;
}
Ok(())
}
struct CharPairs<'a> {
string: &'a str,
chars: std::str::CharIndices<'a>,
char_count: usize,
index: usize,
allow_partial: bool,
}
impl<'a> CharPairs<'a> {
fn new(string: &'a str, allow_partial: bool) -> Self {
let mut char_pairs = Self {
string,
chars: string.char_indices(),
char_count: 0,
index: 0,
allow_partial,
};
char_pairs.next_char();
char_pairs
}
fn next_char(&mut self) -> bool {
if self.index == self.string.len() {
false
}
else {
match self.chars.next() {
Some((i, _)) => {
self.char_count += 1;
self.index = i;
},
None => {
self.index = self.string.len();
}
}
true
}
}
}
impl<'a> Iterator for CharPairs<'a> {
type Item = Result<&'a str>;
fn next(&mut self) -> Option<Self::Item> {
let start_index = self.index;
if !self.next_char() {
return None;
}
if self.next_char() || self.allow_partial {
Some(Ok(&self.string[start_index..self.index]))
}
else {
Some(Err(Error::error(&format!("invalid string: got {} characters, expected even number", self.char_count))))
}
}
}
fn parse_byte(maybe_str_byte: Result<&str>) -> Result<u8> {
match maybe_str_byte {
Ok(str_byte) =>
u8::from_str_radix(str_byte, 16)
.map_err(|_| Error::error("invalid character")),
Err(err) => Err(err),
}
}
#[cfg(test)]
mod tests {
use std::str::{FromStr};
use crate::error::*;
use super::ObjectId;
#[test]
fn object_id_display() {
let obj = ObjectId::new(&[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
]);
assert_eq!(format!("{}", obj), "0001020304050607");
}
#[test]
fn object_id_debug() {
let obj = ObjectId::new(&[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
]);
assert_eq!(format!("{:?}", obj), "ObjectId::new(\"0001020304050607\")");
}
#[test]
fn str_to_object_id() {
let obj = ObjectId::from_str("0001020304050607")
.expect("object id parsing failed");
let obj_ref = ObjectId::new(&[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
]);
assert_eq!(obj, obj_ref);
}
#[test]
fn str_to_object_id_invalid_size() {
let result = ObjectId::from_str("000102030405060");
assert_eq!(
result.expect_err("object id parsing should have failed"),
Error::error("invalid string: got 15 characters, expected even number"),
);
}
#[test]
fn str_to_object_id_invalid_character() {
let result = ObjectId::from_str("00010203 4050607");
assert_eq!(
result.expect_err("object id parsing should have failed"),
Error::error("invalid character"),
);
}
}

View File

@@ -0,0 +1,61 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use super::object_type::{ObjectType};
#[derive(Clone, Eq, PartialEq)]
pub struct ObjectMetadata {
otype: ObjectType,
size: usize,
}
impl ObjectMetadata {
pub fn new(otype: ObjectType, size: usize) -> Self {
Self {
otype,
size,
}
}
pub fn otype(&self) -> &ObjectType {
&self.otype
}
pub fn size(&self) -> usize {
self.size
}
}
impl std::fmt::Debug for ObjectMetadata {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
f.write_fmt(format_args!("<meta {} {}>", self.otype().id().escape_ascii().to_string(), self.size))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn object_metadata_debug() {
let meta = ObjectMetadata::new(ObjectType::new(b"tree").unwrap(), 1234);
assert_eq!(meta, meta);
assert_eq!(format!("{:?}", meta), "<meta tree 1234>");
}
}

View File

@@ -0,0 +1,63 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use super::error::*;
#[derive(Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ObjectType {
id: [u8; 4],
}
impl ObjectType {
pub fn new(id: &[u8]) -> Result<Self> {
if id.len() != 4 {
Err(Error::error("Invalid object type size."))
}
else {
let mut buf = [0; 4];
for (i, b) in id.iter().enumerate() {
buf[i] = *b;
}
Ok(Self {
id: buf,
})
}
}
pub fn id(&self) -> &[u8; 4] {
&self.id
}
}
impl std::fmt::Debug for ObjectType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
f.write_fmt(format_args!("<otype {}>", self.id.escape_ascii().to_string()))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn object_type_debug() {
let otype = ObjectType::new(b"tree").unwrap();
assert_eq!(otype, otype);
assert_eq!(format!("{:?}", otype), "<otype tree>");
}
}

252
cas-core/src/pipeline.rs Normal file
View File

@@ -0,0 +1,252 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use digest::DynDigest;
use super::error::*;
use super::object_id::ObjectId;
pub trait Reader {
fn read(&mut self, buf: &mut [u8]) -> Result<usize>;
fn finalize(self: Box<Self>) -> Result<ObjectId>;
fn read_all(&mut self) -> Result<Vec<u8>> {
let mut buffer = [0u8; 1 << 13];
let mut data = Vec::new();
let mut count = usize::MAX;
while count != 0 {
count = self.read(&mut buffer)?;
if count != 0 {
data.extend_from_slice(&buffer[..count]);
}
}
Ok(data)
}
}
pub trait Writer {
fn write(&mut self, buf: &[u8]) -> Result<()>;
fn finalize(self: Box<Self>) -> Result<ObjectId>;
}
pub trait Pipeline {
fn new_reader(&self, reader: Box<dyn Reader>) -> Box<dyn Reader>;
fn new_writer(&self, writer: Box<dyn Writer>) -> Box<dyn Writer>;
}
impl<R: std::io::Read> Reader for R {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
<Self as std::io::Read>::read(self, buf).or_else(|err|
Err(Error::error(format!("Read error: {}", err)))
)
}
fn finalize(self: Box<Self>) -> Result<ObjectId> {
Err(Error::error("reader pipline has no digest step"))
}
}
impl<W: std::io::Write> Writer for W {
fn write(&mut self, buf: &[u8]) -> Result<()> {
self.write_all(buf).or_else(|err|
Err(Error::error(format!("Write error: {}", err)))
)
}
fn finalize(self: Box<Self>) -> Result<ObjectId> {
Err(Error::error("reader pipline has no digest step"))
}
}
pub struct DigestReader<'a> {
reader: Box<dyn Reader + 'a>,
digest: Option<Box<dyn DynDigest + 'a>>,
oid: Option<ObjectId>,
}
impl<'a> DigestReader<'a> {
pub fn new(reader: Box<dyn Reader + 'a>, digest: Box<dyn DynDigest + 'a>) -> Self {
return DigestReader {
reader,
digest: Some(digest),
oid: None,
}
}
}
impl<'a> Reader for DigestReader<'a> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let count = if let Some(dig) = &mut self.digest {
let count = self.reader.read(buf)?;
dig.update(&buf[..count]);
count
}
else {
0
};
if count == 0 {
if let Some(digest) = self.digest.take() {
self.oid = Some(ObjectId::new(&digest.finalize()));
}
}
Ok(count)
}
fn finalize(self: Box<Self>) -> Result<ObjectId> {
self.oid.ok_or_else(|| Error::error("Reader not finalized"))
}
}
pub struct DigestWriter<'a> {
writer: Box<dyn Writer + 'a>,
digest: Box<dyn DynDigest + 'a>,
}
impl<'a> DigestWriter<'a> {
pub fn new(writer: Box<dyn Writer + 'a>, digest: Box<dyn DynDigest + 'a>) -> Self {
return DigestWriter {
writer,
digest,
}
}
}
impl<'a> Writer for DigestWriter<'a> {
fn write(&mut self, buf: &[u8]) -> Result<()> {
self.digest.update(buf);
self.writer.write(buf)?;
Ok(())
}
fn finalize(self: Box<Self>) -> Result<ObjectId> {
Ok(ObjectId::new(&self.digest.finalize()))
}
}
pub struct DefaultPipeline {
digest: Box<dyn DynDigest>,
}
impl DefaultPipeline {
pub fn new(digest: Box<dyn DynDigest>) -> Self {
Self {
digest,
}
}
}
impl Pipeline for DefaultPipeline {
fn new_reader(&self, reader: Box<dyn Reader>) -> Box<dyn Reader> {
Box::new(DigestReader::new(reader, self.digest.box_clone()))
}
fn new_writer(&self, writer: Box<dyn Writer>) -> Box<dyn Writer> {
Box::new(DigestWriter::new(writer, self.digest.box_clone()))
}
}
#[cfg(test)]
mod tests {
extern crate sha2;
use std::str::FromStr;
use sha2::Sha256;
use super::*;
#[test]
fn std_read_as_reader() {
let data = b"Hello World!".to_vec();
let mut reader: Box<dyn Reader> = Box::new(data.as_slice());
let data2 = reader.read_all().expect("read failed");
let maybe_oid = reader.finalize();
assert_eq!(data2, data);
assert!(maybe_oid.is_err());
}
#[test]
fn std_write_as_writer() {
let data = b"Hello World!".to_vec();
let mut buffer = [0u8; 32];
let maybe_oid = {
let mut writer: Box<dyn Writer> = Box::new(&mut buffer[..]);
writer.write(&data).expect("write failed");
writer.finalize()
};
assert_eq!(buffer[..data.len()], data);
// Check that the rest of the buffer is unchanged
assert!(buffer[data.len()..].iter().all(|&v| v == 0));
assert!(maybe_oid.is_err());
}
#[test]
fn digest_reader() {
let data = b"Hello World!".to_vec();
let oid = ObjectId::from_str(
"7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"
).expect("invalid object id");
let mut reader = Box::new(DigestReader::new(
Box::new(data.as_slice()),
Box::new(Sha256::default()),
));
let data2 = reader.read_all().expect("read failed");
let oid2 = reader.finalize().expect("finalize failed");
assert_eq!(data2, data);
assert_eq!(oid2, oid);
}
#[test]
fn digest_writer() {
let data = b"Hello World!".to_vec();
let oid = ObjectId::from_str(
"7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"
).expect("invalid object id");
let mut buffer = [0u8; 32];
let oid2 = {
let mut writer = Box::new(DigestWriter::new(
Box::new(&mut buffer[..]),
Box::new(Sha256::default()),
));
writer.write(&data).expect("write failed");
writer.finalize().expect("finalize failed")
};
assert_eq!(buffer[..data.len()], data);
// Check that the rest of the buffer is unchanged
assert!(buffer[data.len()..].iter().all(|&v| v == 0));
assert_eq!(oid2, oid);
}
}