Mostly implement simple-cas.

This commit is contained in:
2021-08-22 17:19:32 +02:00
parent 5664beca57
commit 25b5f4e7ba
9 changed files with 1003 additions and 272 deletions

563
cas-simple/src/cas.rs Normal file
View File

@@ -0,0 +1,563 @@
// This file is part of bsv.
//
// bsv is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// cdb is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
// more details.
//
// You should have received a copy of the Affero GNU General Public License
// along with cdb. If not, see <https://www.gnu.org/licenses/>.
use std::str::FromStr;
use std::path::{Path, PathBuf};
use digest::DynDigest;
use toml::Value;
use cas_core::{
Cas, DefaultPipeline, Error, ObjectId, ObjectMetadata, ObjectType,
Pipeline, Reader, ReadWrapper, Result, Writer,
};
use crate::utils::{
obj_dir, obj_path, ref_dir, tmp_dir,
read_config, write_config,
read_metadata, write_metadata,
new_digest,
};
use crate::wfile::WFile;
pub struct SimpleCas {
db_path: PathBuf,
digest: Box<dyn DynDigest>,
pipeline: DefaultPipeline,
config: Value,
}
impl SimpleCas {
pub fn create(db_path: PathBuf, config: Value) -> Result<Self> {
let digest_id = config["cas"]["digest"].as_str()
.ok_or_else(|| Error::error(
"mandatory cas.digest value is invalid or missing from config"
))?;
let digest = new_digest(digest_id)?;
let pipeline = DefaultPipeline::new(digest.box_clone());
if db_path.exists() {
return Err(Error::error(format!(
"failed to create SimpleCas: target directory already exists ({})",
db_path.to_string_lossy(),
)));
}
for path in [
&db_path,
&obj_dir(&db_path),
&ref_dir(&db_path),
&tmp_dir(&db_path),
] {
std::fs::create_dir(path).or_else(|e|
Err(Error::error(format!(
"failed to create directory ({}): {}",
path.to_string_lossy(), e,
)))
)?;
}
write_config(&config, &db_path)?;
Ok(SimpleCas {
db_path,
digest,
pipeline,
config,
})
}
pub fn open(db_path: PathBuf) -> Result<Self> {
let config = read_config(&db_path)?;
let digest_id = config["cas"]["digest"].as_str()
.ok_or_else(|| Error::error(
"mandatory cas.digest value is invalid or missing from config"
))?;
let digest = new_digest(digest_id)?;
let pipeline = DefaultPipeline::new(digest.box_clone());
Ok(SimpleCas {
db_path,
digest,
pipeline,
config,
})
}
pub fn save_config(&self) -> Result<()> {
write_config(&self.config, &self.db_path)
}
}
impl Cas for SimpleCas {
fn object_id_from_string(&self, hex: &str) -> Result<ObjectId> {
if hex.len() == self.digest.output_size() * 2 {
ObjectId::from_str(hex)
}
else {
Err(Error::error(format!(
"invalid object id size: got {}, expected {}",
hex.len(), self.digest.output_size() * 2,
)))
}
}
fn has_object_id(&self, oid: &ObjectId) -> Result<bool> {
let opath = obj_path(&self.db_path, oid);
Ok(opath.is_file())
}
fn iter_object_id<'s>(&'s self) -> Result<Box<dyn 's + Iterator<Item = Result<ObjectId>>>> {
Ok(Box::new(ObjectIdIterator::new(self)?))
}
fn open_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, Box<dyn Reader>)> {
let opath = obj_path(&self.db_path, oid);
if !opath.is_file() {
return Err(Error::error(format!("object not found: {}", oid)));
}
let file = std::fs::File::open(opath).or_else(|err|
Err(Error::error(format!("failed to open object {}: {}", oid, err))))?;
let wrapper = Box::new(ReadWrapper::new(file));
let mut reader = self.pipeline.new_reader(wrapper);
let metadata = read_metadata(&mut reader)?;
Ok((metadata, reader))
}
fn new_writer(&mut self, otype: &ObjectType, size: u64) -> Result<Box<dyn Writer>> {
let file = Box::new(WFile::new(self.db_path.clone())?);
let mut writer = self.pipeline.new_writer(file);
write_metadata(&mut writer, otype, size)?;
Ok(writer)
}
fn remove_object(&mut self, oid: &ObjectId) -> Result<()> {
let opath = obj_path(&self.db_path, oid);
if !opath.is_file() {
return Err(Error::error(format!("object not found: {}", oid)));
}
std::fs::remove_file(opath).or_else(|err|
Err(Error::error(format!("failed to remove object {}: {}", oid, err))))?;
Ok(())
}
}
pub struct ObjectIdIterator {
root_dirs: Vec<PathBuf>,
inner_dirs: Vec<PathBuf>,
objects: Vec<PathBuf>,
root_index: usize,
inner_index: usize,
object_index: usize,
}
impl ObjectIdIterator {
fn new(cas: &SimpleCas) -> Result<Self> {
let root_dirs = read_dir(&obj_dir(&cas.db_path))?;
let mut it = Self {
root_dirs,
inner_dirs: Vec::new(),
objects: Vec::new(),
root_index: 0,
inner_index: 0,
object_index: 0,
};
it.fetch_inner_dirs()?;
if !it.is_item_valid() {
it.next_valid_item()?;
}
Ok(it)
}
fn get_object_id(&mut self) -> Result<ObjectId> {
let path = &self.objects[self.object_index];
if !path.is_file() {
return Error::err(format!("item in object database is not a file: {:?}", path));
}
let id = path.ancestors()
.take(3)
.collect::<Vec<_>>()
.iter()
.rev()
.map(|p| p.file_name()?.to_str())
.collect::<Option<String>>()
.ok_or_else(|| Error::error(format!("invalid object in object database: {:?}", path)))?;
ObjectId::from_str(&id)
}
fn is_done(&self) -> bool {
self.root_index == self.root_dirs.len()
}
fn is_item_valid(&self) -> bool {
self.root_index < self.root_dirs.len() &&
self.inner_index < self.inner_dirs.len() &&
self.object_index < self.objects.len()
}
fn next_item(&mut self) -> Result<()> {
if self.object_index < self.objects.len() {
self.object_index += 1;
}
else if self.inner_index < self.inner_dirs.len() {
self.inner_index += 1;
self.fetch_objects()?;
}
else if self.root_index < self.root_dirs.len() {
self.root_index += 1;
self.fetch_inner_dirs()?;
}
Ok(())
}
fn next_valid_item(&mut self) -> Result<bool> {
if !self.is_done() {
self.next_item()?;
}
while !self.is_done() && !self.is_item_valid() {
self.next_item()?;
}
Ok(self.is_done())
}
fn fetch_objects(&mut self) -> Result<()> {
self.object_index = 0;
if self.inner_index < self.inner_dirs.len() {
let path = &self.inner_dirs[self.inner_index];
self.objects = read_dir(path)?;
}
else {
self.objects.clear();
}
Ok(())
}
fn fetch_inner_dirs(&mut self) -> Result<()> {
self.inner_index = 0;
if self.root_index < self.root_dirs.len() {
let path = &self.root_dirs[self.root_index];
self.inner_dirs = read_dir(path)?;
self.fetch_objects()?;
}
else {
self.inner_dirs.clear();
}
Ok(())
}
}
impl Iterator for ObjectIdIterator {
type Item = Result<ObjectId>;
fn next(&mut self) -> Option<Self::Item> {
if !self.is_done() {
debug_assert!(self.is_item_valid());
let item = self.get_object_id();
if item.is_ok() {
if let Err(err) = self.next_valid_item() {
return Some(Err(err));
}
}
Some(item)
}
else {
None
}
}
}
fn read_dir(path: &Path) -> Result<Vec<PathBuf>> {
let mut paths = std::fs::read_dir(path)
.or_else(|err| Err(Error::error(format!(
"failed to read directory {:?}: {}", path, err))))?
.map(|dir| dir.map(|dir| dir.path()))
.collect::<std::io::Result<Vec<_>>>()
.or_else(|err| Err(Error::error(format!(
"error while reading directory {:?}: {}", path, err))))?;
paths.sort();
Ok(paths)
}
#[cfg(test)]
mod tests {
use std::path::Path;
use super::*;
fn get_config() -> Value {
toml::toml!(
[cas]
digest = "sha256"
)
}
fn get_cas_path(dir: &Path) -> PathBuf {
let mut cas_path = dir.to_path_buf();
cas_path.push(".bsv");
cas_path
}
#[test]
fn test_create_simple_cas() {
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let cas = SimpleCas::create(cas_path, config)
.expect("failed to create SimpleCas object");
let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786")
.expect("failed to create object id");
assert!(!cas.has_object_id(&oid).expect("has_object_id failed"));
}
#[test]
fn test_write_object() {
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let otype = ObjectType::new(b"blob").expect("failed to create object type");
let payload = b"Hello World!";
let mut cas = SimpleCas::create(cas_path.clone(), config)
.expect("failed to create SimpleCas object");
let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786")
.expect("failed to create object id");
assert!(!cas.has_object_id(&oid).expect("has_object_id failed"));
{
let mut writer = cas.new_writer(&otype, payload.len() as u64)
.expect("failed to create object writer");
writer.write_all(payload).expect("failed to write object");
let oid2 = writer.finalize().expect("failed to finalize writer");
assert_eq!(oid2, oid);
}
assert!(cas.has_object_id(&oid).expect("has_object_id failed"));
}
#[test]
fn test_read_object() {
use std::io::Write;
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let otype = ObjectType::new(b"blob").expect("failed to create object type");
let payload = b"Hello World!";
let cas = SimpleCas::create(cas_path.clone(), config)
.expect("failed to create SimpleCas object");
let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786")
.expect("failed to create object id");
assert!(!cas.has_object_id(&oid).expect("has_object_id failed"));
{
let opath = obj_path(&cas_path, &oid);
std::fs::create_dir_all(opath.parent().unwrap()).expect("failed to create object dir");
let mut file = std::fs::File::create(opath)
.expect("failed to open object file for write");
file.write_all(b"blob\x00\x00\x00\x00\x00\x00\x00\x0c")
.expect("failed to write object header");
file.write_all(payload).expect("failed to write object payload");
}
assert!(cas.has_object_id(&oid).expect("has_object_id failed"));
let (metadata, mut reader) = cas.open_object(&oid).expect("failed to open object");
let mut buf = Vec::new();
reader.read_to_end(&mut buf).expect("failed to read object");
let oid2 = reader.finalize().expect("failed to finalize object reader");
assert_eq!(oid2, oid);
assert_eq!(metadata.otype(), &otype);
assert_eq!(metadata.size(), payload.len() as u64);
assert_eq!(buf, payload);
}
#[test]
fn test_read_write_object() {
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let otype = ObjectType::new(b"blob").expect("failed to create object type");
let payload = b"This is a test.";
let mut cas = SimpleCas::create(cas_path.clone(), config)
.expect("failed to create SimpleCas object");
let oid = cas.write_object(&otype, payload).expect("failed to write object");
let (metadata, data) = cas.read_object(&oid).expect("failed to read object");
assert_eq!(metadata.otype(), &otype);
assert_eq!(metadata.size(), payload.len() as u64);
assert_eq!(data, payload);
}
#[test]
fn test_remove_object() {
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let otype = ObjectType::new(b"blob").expect("failed to create object type");
let payload = b"This is a test.";
let mut cas = SimpleCas::create(cas_path.clone(), config)
.expect("failed to create SimpleCas object");
let oid = cas.write_object(&otype, payload).expect("failed to write object");
assert!(cas.has_object_id(&oid).unwrap());
cas.remove_object(&oid).expect("failed to remove object");
assert!(!cas.has_object_id(&oid).unwrap());
}
#[test]
fn test_object_id_iterator() {
let dir = tempfile::tempdir().expect("failed to create temp test dir");
let cas_path = get_cas_path(dir.path());
let config = get_config();
let mut cas = SimpleCas::create(cas_path.clone(), config)
.expect("failed to create SimpleCas object");
let add_fake_object = |oid: &ObjectId| {
use std::io::Write;
let path = obj_path(&cas.db_path, oid);
std::fs::create_dir_all(path.parent().unwrap())
.expect("failed to create object directory");
let mut file = std::fs::File::create(path).unwrap();
file.write(b"Test").unwrap();
};
let oid_0_0_0 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000000").unwrap();
let oid_0_0_1 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000001").unwrap();
let oid_0_0_2 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000002").unwrap();
let oid_0_1_0 = ObjectId::from_str("0001000000000000000000000000000000000000000000000000000000000000").unwrap();
let oid_0_2_0 = ObjectId::from_str("0002000000000000000000000000000000000000000000000000000000000000").unwrap();
let oid_1_0_0 = ObjectId::from_str("0100000000000000000000000000000000000000000000000000000000000000").unwrap();
let oid_2_0_0 = ObjectId::from_str("0200000000000000000000000000000000000000000000000000000000000000").unwrap();
let objects = cas.iter_object_id().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, []);
let all_oids = [
oid_0_0_0.clone(), oid_0_0_1.clone(), oid_0_0_2.clone(),
oid_0_1_0.clone(), oid_0_2_0.clone(),
oid_1_0_0.clone(), oid_2_0_0.clone(),
];
for oid in all_oids.iter() {
add_fake_object(&oid);
}
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, all_oids);
cas.remove_object(&oid_0_0_0).unwrap();
let oids = [
oid_0_0_1.clone(), oid_0_0_2.clone(),
oid_0_1_0.clone(), oid_0_2_0.clone(),
oid_1_0_0.clone(), oid_2_0_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_0_0_2).unwrap();
let oids = [
oid_0_0_1.clone(),
oid_0_1_0.clone(), oid_0_2_0.clone(),
oid_1_0_0.clone(), oid_2_0_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_0_0_1).unwrap();
let oids = [
oid_0_1_0.clone(), oid_0_2_0.clone(),
oid_1_0_0.clone(), oid_2_0_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_2_0_0).unwrap();
let oids = [
oid_0_1_0.clone(), oid_0_2_0.clone(),
oid_1_0_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_1_0_0).unwrap();
let oids = [
oid_0_1_0.clone(), oid_0_2_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_0_1_0).unwrap();
let oids = [
oid_0_2_0.clone(),
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
cas.remove_object(&oid_0_2_0).unwrap();
let oids = [
];
let objects = cas.iter_object_id().unwrap()
.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(objects, oids);
}
}