diff --git a/cas-core/src/cas.rs b/cas-core/src/cas.rs index 089e7de..27750e4 100644 --- a/cas-core/src/cas.rs +++ b/cas-core/src/cas.rs @@ -14,10 +14,13 @@ // along with cdb. If not, see . -use super::error::Result; +use std::path::Path; + +use super::error::{Error, Result}; use super::object_id::ObjectId; use super::object_type::ObjectType; use super::object_metadata::ObjectMetadata; +use super::pipeline::{Reader, Writer}; // pub struct ObjectIdIterator; @@ -25,16 +28,41 @@ use super::object_metadata::ObjectMetadata; pub trait Cas { fn object_id_from_string(&self, hex: &str) -> Result; - fn object_id_from_partial(&self, hex: &str) -> Result; fn has_object_id(&self, oid: &ObjectId) -> Result; - // fn iter_object_id(&self) -> Result; + fn iter_object_id<'s>(&'s self) -> Result>>>; + + fn open_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, Box)>; + fn new_writer(&mut self, otype: &ObjectType, size: u64) -> Result>; + + fn read_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, Vec)> { + let (metadata, mut reader) = self.open_object(oid)?; + let mut data = Vec::new(); + + reader.read_to_end(&mut data).or_else(|err| + Err(Error::error(format!("failed to read object {}: {}", oid, err))))?; + + let result_oid = reader.finalize()?; + if &result_oid == oid { + Ok((metadata, data)) + } + else { + Err(Error::error(format!("object id mismatch: requested {}, read {}", oid, result_oid))) + } + } + + fn write_object(&mut self, otype: &ObjectType, data: &[u8]) -> Result { + let mut writer = self.new_writer(otype, data.len() as u64)?; + writer.write_all(data).or_else(|err| + Err(Error::error(format!("failed to write object: {}", err))))?; + writer.finalize() + } - fn read_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, &dyn std::io::Read)>; - fn write_object(&mut self, otype: ObjectType, data: &mut dyn std::io::Read) -> Result; fn remove_object(&mut self, oid: &ObjectId) -> Result<()>; +} - fn read_ref(&self, key: &str) -> Result; - fn write_ref(&mut self, key: &str, value: &ObjectId) -> Result<()>; - fn remove_ref(&mut self, key: &str) -> Result<()>; +pub trait RefStore { + fn get_ref>(&self, key: P) -> Result; + fn set_ref>(&mut self, key: P, value: &ObjectId) -> Result<()>; + fn remove_ref>(&mut self, key: P) -> Result<()>; } \ No newline at end of file diff --git a/cas-core/src/error.rs b/cas-core/src/error.rs index 6f36aa2..9903ecd 100644 --- a/cas-core/src/error.rs +++ b/cas-core/src/error.rs @@ -111,6 +111,10 @@ impl Error { pub fn error>(message: M) -> Box { Box::new(Error::Error(message.into())) } + + pub fn err, T>(message: M) -> Result { + Err(Self::error(message)) + } } diff --git a/cas-core/src/lib.rs b/cas-core/src/lib.rs index 1f2e681..9d8bacd 100644 --- a/cas-core/src/lib.rs +++ b/cas-core/src/lib.rs @@ -38,7 +38,7 @@ pub use crate::{ object_id::{ObjectId, hex, write_hex}, object_type::{ObjectType}, object_metadata::{ObjectMetadata}, - pipeline::{Pipeline, DefaultPipeline}, + pipeline::{Pipeline, DefaultPipeline, Reader, Writer, ReadWrapper, WriteWrapper}, cas::{Cas}, }; diff --git a/cas-core/src/object_metadata.rs b/cas-core/src/object_metadata.rs index aeae1ea..13c741e 100644 --- a/cas-core/src/object_metadata.rs +++ b/cas-core/src/object_metadata.rs @@ -20,11 +20,11 @@ use super::object_type::{ObjectType}; #[derive(Clone, Eq, PartialEq)] pub struct ObjectMetadata { otype: ObjectType, - size: usize, + size: u64, } impl ObjectMetadata { - pub fn new(otype: ObjectType, size: usize) -> Self { + pub fn new(otype: ObjectType, size: u64) -> Self { Self { otype, size, @@ -35,7 +35,7 @@ impl ObjectMetadata { &self.otype } - pub fn size(&self) -> usize { + pub fn size(&self) -> u64 { self.size } } diff --git a/cas-core/src/pipeline.rs b/cas-core/src/pipeline.rs index 487720c..744a14f 100644 --- a/cas-core/src/pipeline.rs +++ b/cas-core/src/pipeline.rs @@ -19,29 +19,13 @@ use super::error::*; use super::object_id::ObjectId; -pub trait Reader { - fn read(&mut self, buf: &mut [u8]) -> Result; +pub trait Reader: std::io::Read { fn finalize(self: Box) -> Result; - - fn read_all(&mut self) -> Result> { - let mut buffer = [0u8; 1 << 13]; - let mut data = Vec::new(); - let mut count = usize::MAX; - - while count != 0 { - count = self.read(&mut buffer)?; - if count != 0 { - data.extend_from_slice(&buffer[..count]); - } - } - - Ok(data) - } } -pub trait Writer { - fn write(&mut self, buf: &[u8]) -> Result<()>; +pub trait Writer: std::io::Write { fn finalize(self: Box) -> Result; + fn _finalize(self: Box, oid: ObjectId) -> Result; } @@ -51,29 +35,57 @@ pub trait Pipeline { } -impl Reader for R { - fn read(&mut self, buf: &mut [u8]) -> Result { - ::read(self, buf).or_else(|err| - Err(Error::error(format!("Read error: {}", err))) - ) +pub struct ReadWrapper { + reader: R, +} + +impl ReadWrapper { + pub fn new(reader: R) -> Self { + Self { reader } + } +} + +impl std::io::Read for ReadWrapper { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.reader.read(buf) } +} +impl Reader for ReadWrapper { fn finalize(self: Box) -> Result { Err(Error::error("reader pipline has no digest step")) } } -impl Writer for W { - fn write(&mut self, buf: &[u8]) -> Result<()> { - self.write_all(buf).or_else(|err| - Err(Error::error(format!("Write error: {}", err))) - ) +pub struct WriteWrapper { + writer: W, +} + +impl WriteWrapper { + pub fn new(writer: W) -> Self { + Self { writer } + } +} + +impl std::io::Write for WriteWrapper { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.writer.write(buf) } + fn flush(&mut self) -> std::io::Result<()> { + self.writer.flush() + } +} + +impl Writer for WriteWrapper { fn finalize(self: Box) -> Result { Err(Error::error("reader pipline has no digest step")) } + + fn _finalize(self: Box, oid: ObjectId) -> Result { + Ok(oid) + } } @@ -93,8 +105,8 @@ impl<'a> DigestReader<'a> { } } -impl<'a> Reader for DigestReader<'a> { - fn read(&mut self, buf: &mut [u8]) -> Result { +impl<'a> std::io::Read for DigestReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let count = if let Some(dig) = &mut self.digest { let count = self.reader.read(buf)?; dig.update(&buf[..count]); @@ -112,7 +124,9 @@ impl<'a> Reader for DigestReader<'a> { Ok(count) } +} +impl<'a> Reader for DigestReader<'a> { fn finalize(self: Box) -> Result { self.oid.ok_or_else(|| Error::error("Reader not finalized")) } @@ -133,15 +147,26 @@ impl<'a> DigestWriter<'a> { } } -impl<'a> Writer for DigestWriter<'a> { - fn write(&mut self, buf: &[u8]) -> Result<()> { - self.digest.update(buf); - self.writer.write(buf)?; - Ok(()) +impl<'a> std::io::Write for DigestWriter<'a> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let count = self.writer.write(buf)?; + self.digest.update(&buf[..count]); + Ok(count) } + fn flush(&mut self) -> std::io::Result<()> { + self.writer.flush() + } +} + +impl<'a> Writer for DigestWriter<'a> { fn finalize(self: Box) -> Result { - Ok(ObjectId::new(&self.digest.finalize())) + let oid = ObjectId::new(&self.digest.finalize()); + self.writer._finalize(oid) + } + + fn _finalize(self: Box, _oid: ObjectId) -> Result { + Err(Error::error("writer pipeline has several digest steps")) } } @@ -182,9 +207,10 @@ mod tests { #[test] fn std_read_as_reader() { let data = b"Hello World!".to_vec(); - let mut reader: Box = Box::new(data.as_slice()); + let mut reader: Box = Box::new(ReadWrapper::new(data.as_slice())); - let data2 = reader.read_all().expect("read failed"); + let mut data2 = Vec::new(); + reader.read_to_end(&mut data2).expect("read failed"); let maybe_oid = reader.finalize(); assert_eq!(data2, data); @@ -197,7 +223,7 @@ mod tests { let mut buffer = [0u8; 32]; let maybe_oid = { - let mut writer: Box = Box::new(&mut buffer[..]); + let mut writer: Box = Box::new(WriteWrapper::new(&mut buffer[..])); writer.write(&data).expect("write failed"); writer.finalize() }; @@ -210,17 +236,20 @@ mod tests { #[test] fn digest_reader() { + use std::io::Read; + let data = b"Hello World!".to_vec(); let oid = ObjectId::from_str( "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069" ).expect("invalid object id"); let mut reader = Box::new(DigestReader::new( - Box::new(data.as_slice()), + Box::new(ReadWrapper::new(data.as_slice())), Box::new(Sha256::default()), )); - let data2 = reader.read_all().expect("read failed"); + let mut data2 = Vec::new(); + reader.read_to_end(&mut data2).expect("read failed"); let oid2 = reader.finalize().expect("finalize failed"); assert_eq!(data2, data); @@ -229,6 +258,8 @@ mod tests { #[test] fn digest_writer() { + use std::io::Write; + let data = b"Hello World!".to_vec(); let oid = ObjectId::from_str( "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069" @@ -237,7 +268,7 @@ mod tests { let mut buffer = [0u8; 32]; let oid2 = { let mut writer = Box::new(DigestWriter::new( - Box::new(&mut buffer[..]), + Box::new(WriteWrapper::new(&mut buffer[..])), Box::new(Sha256::default()), )); writer.write(&data).expect("write failed"); diff --git a/cas-simple/src/cas.rs b/cas-simple/src/cas.rs new file mode 100644 index 0000000..9d3714d --- /dev/null +++ b/cas-simple/src/cas.rs @@ -0,0 +1,563 @@ +// This file is part of bsv. +// +// bsv is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// cdb is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for +// more details. +// +// You should have received a copy of the Affero GNU General Public License +// along with cdb. If not, see . + + +use std::str::FromStr; +use std::path::{Path, PathBuf}; + +use digest::DynDigest; +use toml::Value; +use cas_core::{ + Cas, DefaultPipeline, Error, ObjectId, ObjectMetadata, ObjectType, + Pipeline, Reader, ReadWrapper, Result, Writer, +}; + +use crate::utils::{ + obj_dir, obj_path, ref_dir, tmp_dir, + read_config, write_config, + read_metadata, write_metadata, + new_digest, +}; +use crate::wfile::WFile; + + +pub struct SimpleCas { + db_path: PathBuf, + digest: Box, + pipeline: DefaultPipeline, + config: Value, +} + + +impl SimpleCas { + pub fn create(db_path: PathBuf, config: Value) -> Result { + let digest_id = config["cas"]["digest"].as_str() + .ok_or_else(|| Error::error( + "mandatory cas.digest value is invalid or missing from config" + ))?; + let digest = new_digest(digest_id)?; + let pipeline = DefaultPipeline::new(digest.box_clone()); + + if db_path.exists() { + return Err(Error::error(format!( + "failed to create SimpleCas: target directory already exists ({})", + db_path.to_string_lossy(), + ))); + } + + for path in [ + &db_path, + &obj_dir(&db_path), + &ref_dir(&db_path), + &tmp_dir(&db_path), + ] { + std::fs::create_dir(path).or_else(|e| + Err(Error::error(format!( + "failed to create directory ({}): {}", + path.to_string_lossy(), e, + ))) + )?; + } + + write_config(&config, &db_path)?; + + Ok(SimpleCas { + db_path, + digest, + pipeline, + config, + }) + } + + pub fn open(db_path: PathBuf) -> Result { + let config = read_config(&db_path)?; + + let digest_id = config["cas"]["digest"].as_str() + .ok_or_else(|| Error::error( + "mandatory cas.digest value is invalid or missing from config" + ))?; + let digest = new_digest(digest_id)?; + let pipeline = DefaultPipeline::new(digest.box_clone()); + + Ok(SimpleCas { + db_path, + digest, + pipeline, + config, + }) + } + + pub fn save_config(&self) -> Result<()> { + write_config(&self.config, &self.db_path) + } +} + + +impl Cas for SimpleCas { + fn object_id_from_string(&self, hex: &str) -> Result { + if hex.len() == self.digest.output_size() * 2 { + ObjectId::from_str(hex) + } + else { + Err(Error::error(format!( + "invalid object id size: got {}, expected {}", + hex.len(), self.digest.output_size() * 2, + ))) + } + } + + fn has_object_id(&self, oid: &ObjectId) -> Result { + let opath = obj_path(&self.db_path, oid); + Ok(opath.is_file()) + } + + fn iter_object_id<'s>(&'s self) -> Result>>> { + Ok(Box::new(ObjectIdIterator::new(self)?)) + } + + + fn open_object(&self, oid: &ObjectId) -> Result<(ObjectMetadata, Box)> { + let opath = obj_path(&self.db_path, oid); + if !opath.is_file() { + return Err(Error::error(format!("object not found: {}", oid))); + } + + let file = std::fs::File::open(opath).or_else(|err| + Err(Error::error(format!("failed to open object {}: {}", oid, err))))?; + let wrapper = Box::new(ReadWrapper::new(file)); + let mut reader = self.pipeline.new_reader(wrapper); + let metadata = read_metadata(&mut reader)?; + + Ok((metadata, reader)) + } + + fn new_writer(&mut self, otype: &ObjectType, size: u64) -> Result> { + let file = Box::new(WFile::new(self.db_path.clone())?); + let mut writer = self.pipeline.new_writer(file); + write_metadata(&mut writer, otype, size)?; + + Ok(writer) + } + + fn remove_object(&mut self, oid: &ObjectId) -> Result<()> { + let opath = obj_path(&self.db_path, oid); + if !opath.is_file() { + return Err(Error::error(format!("object not found: {}", oid))); + } + std::fs::remove_file(opath).or_else(|err| + Err(Error::error(format!("failed to remove object {}: {}", oid, err))))?; + Ok(()) + } +} + + +pub struct ObjectIdIterator { + root_dirs: Vec, + inner_dirs: Vec, + objects: Vec, + root_index: usize, + inner_index: usize, + object_index: usize, +} + +impl ObjectIdIterator { + fn new(cas: &SimpleCas) -> Result { + let root_dirs = read_dir(&obj_dir(&cas.db_path))?; + let mut it = Self { + root_dirs, + inner_dirs: Vec::new(), + objects: Vec::new(), + root_index: 0, + inner_index: 0, + object_index: 0, + }; + + it.fetch_inner_dirs()?; + if !it.is_item_valid() { + it.next_valid_item()?; + } + + Ok(it) + } + + fn get_object_id(&mut self) -> Result { + let path = &self.objects[self.object_index]; + + if !path.is_file() { + return Error::err(format!("item in object database is not a file: {:?}", path)); + } + + let id = path.ancestors() + .take(3) + .collect::>() + .iter() + .rev() + .map(|p| p.file_name()?.to_str()) + .collect::>() + .ok_or_else(|| Error::error(format!("invalid object in object database: {:?}", path)))?; + ObjectId::from_str(&id) + } + + fn is_done(&self) -> bool { + self.root_index == self.root_dirs.len() + } + + fn is_item_valid(&self) -> bool { + self.root_index < self.root_dirs.len() && + self.inner_index < self.inner_dirs.len() && + self.object_index < self.objects.len() + } + + fn next_item(&mut self) -> Result<()> { + if self.object_index < self.objects.len() { + self.object_index += 1; + } + else if self.inner_index < self.inner_dirs.len() { + self.inner_index += 1; + self.fetch_objects()?; + } + else if self.root_index < self.root_dirs.len() { + self.root_index += 1; + self.fetch_inner_dirs()?; + } + Ok(()) + } + + fn next_valid_item(&mut self) -> Result { + if !self.is_done() { + self.next_item()?; + } + while !self.is_done() && !self.is_item_valid() { + self.next_item()?; + } + Ok(self.is_done()) + } + + fn fetch_objects(&mut self) -> Result<()> { + self.object_index = 0; + if self.inner_index < self.inner_dirs.len() { + let path = &self.inner_dirs[self.inner_index]; + self.objects = read_dir(path)?; + } + else { + self.objects.clear(); + } + Ok(()) + } + + fn fetch_inner_dirs(&mut self) -> Result<()> { + self.inner_index = 0; + if self.root_index < self.root_dirs.len() { + let path = &self.root_dirs[self.root_index]; + self.inner_dirs = read_dir(path)?; + self.fetch_objects()?; + } + else { + self.inner_dirs.clear(); + } + Ok(()) + } +} + +impl Iterator for ObjectIdIterator { + type Item = Result; + + fn next(&mut self) -> Option { + if !self.is_done() { + debug_assert!(self.is_item_valid()); + let item = self.get_object_id(); + if item.is_ok() { + if let Err(err) = self.next_valid_item() { + return Some(Err(err)); + } + } + Some(item) + } + else { + None + } + } +} + +fn read_dir(path: &Path) -> Result> { + let mut paths = std::fs::read_dir(path) + .or_else(|err| Err(Error::error(format!( + "failed to read directory {:?}: {}", path, err))))? + .map(|dir| dir.map(|dir| dir.path())) + .collect::>>() + .or_else(|err| Err(Error::error(format!( + "error while reading directory {:?}: {}", path, err))))?; + paths.sort(); + Ok(paths) +} + + +#[cfg(test)] +mod tests { + use std::path::Path; + + use super::*; + + fn get_config() -> Value { + toml::toml!( + [cas] + digest = "sha256" + ) + } + + fn get_cas_path(dir: &Path) -> PathBuf { + let mut cas_path = dir.to_path_buf(); + cas_path.push(".bsv"); + cas_path + } + + #[test] + fn test_create_simple_cas() { + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let cas = SimpleCas::create(cas_path, config) + .expect("failed to create SimpleCas object"); + let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786") + .expect("failed to create object id"); + + assert!(!cas.has_object_id(&oid).expect("has_object_id failed")); + } + + #[test] + fn test_write_object() { + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let otype = ObjectType::new(b"blob").expect("failed to create object type"); + let payload = b"Hello World!"; + + let mut cas = SimpleCas::create(cas_path.clone(), config) + .expect("failed to create SimpleCas object"); + let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786") + .expect("failed to create object id"); + + assert!(!cas.has_object_id(&oid).expect("has_object_id failed")); + + { + let mut writer = cas.new_writer(&otype, payload.len() as u64) + .expect("failed to create object writer"); + writer.write_all(payload).expect("failed to write object"); + let oid2 = writer.finalize().expect("failed to finalize writer"); + + assert_eq!(oid2, oid); + } + + assert!(cas.has_object_id(&oid).expect("has_object_id failed")); + } + + #[test] + fn test_read_object() { + use std::io::Write; + + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let otype = ObjectType::new(b"blob").expect("failed to create object type"); + let payload = b"Hello World!"; + + let cas = SimpleCas::create(cas_path.clone(), config) + .expect("failed to create SimpleCas object"); + let oid = cas.object_id_from_string("f731f6bc6a6a73bad170e56452473ef6930b7a0ab33cc54be44221a89b49d786") + .expect("failed to create object id"); + + assert!(!cas.has_object_id(&oid).expect("has_object_id failed")); + + { + let opath = obj_path(&cas_path, &oid); + std::fs::create_dir_all(opath.parent().unwrap()).expect("failed to create object dir"); + let mut file = std::fs::File::create(opath) + .expect("failed to open object file for write"); + file.write_all(b"blob\x00\x00\x00\x00\x00\x00\x00\x0c") + .expect("failed to write object header"); + file.write_all(payload).expect("failed to write object payload"); + } + + assert!(cas.has_object_id(&oid).expect("has_object_id failed")); + + let (metadata, mut reader) = cas.open_object(&oid).expect("failed to open object"); + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).expect("failed to read object"); + let oid2 = reader.finalize().expect("failed to finalize object reader"); + + assert_eq!(oid2, oid); + assert_eq!(metadata.otype(), &otype); + assert_eq!(metadata.size(), payload.len() as u64); + assert_eq!(buf, payload); + } + + #[test] + fn test_read_write_object() { + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let otype = ObjectType::new(b"blob").expect("failed to create object type"); + let payload = b"This is a test."; + + let mut cas = SimpleCas::create(cas_path.clone(), config) + .expect("failed to create SimpleCas object"); + + let oid = cas.write_object(&otype, payload).expect("failed to write object"); + let (metadata, data) = cas.read_object(&oid).expect("failed to read object"); + + assert_eq!(metadata.otype(), &otype); + assert_eq!(metadata.size(), payload.len() as u64); + assert_eq!(data, payload); + } + + #[test] + fn test_remove_object() { + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let otype = ObjectType::new(b"blob").expect("failed to create object type"); + let payload = b"This is a test."; + + let mut cas = SimpleCas::create(cas_path.clone(), config) + .expect("failed to create SimpleCas object"); + + let oid = cas.write_object(&otype, payload).expect("failed to write object"); + + assert!(cas.has_object_id(&oid).unwrap()); + + cas.remove_object(&oid).expect("failed to remove object"); + + assert!(!cas.has_object_id(&oid).unwrap()); + } + + #[test] + fn test_object_id_iterator() { + let dir = tempfile::tempdir().expect("failed to create temp test dir"); + let cas_path = get_cas_path(dir.path()); + let config = get_config(); + + let mut cas = SimpleCas::create(cas_path.clone(), config) + .expect("failed to create SimpleCas object"); + + let add_fake_object = |oid: &ObjectId| { + use std::io::Write; + + let path = obj_path(&cas.db_path, oid); + std::fs::create_dir_all(path.parent().unwrap()) + .expect("failed to create object directory"); + let mut file = std::fs::File::create(path).unwrap(); + file.write(b"Test").unwrap(); + }; + + let oid_0_0_0 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000000").unwrap(); + let oid_0_0_1 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000001").unwrap(); + let oid_0_0_2 = ObjectId::from_str("0000000000000000000000000000000000000000000000000000000000000002").unwrap(); + + let oid_0_1_0 = ObjectId::from_str("0001000000000000000000000000000000000000000000000000000000000000").unwrap(); + let oid_0_2_0 = ObjectId::from_str("0002000000000000000000000000000000000000000000000000000000000000").unwrap(); + + let oid_1_0_0 = ObjectId::from_str("0100000000000000000000000000000000000000000000000000000000000000").unwrap(); + let oid_2_0_0 = ObjectId::from_str("0200000000000000000000000000000000000000000000000000000000000000").unwrap(); + + let objects = cas.iter_object_id().unwrap().collect::>>().unwrap(); + assert_eq!(objects, []); + + let all_oids = [ + oid_0_0_0.clone(), oid_0_0_1.clone(), oid_0_0_2.clone(), + oid_0_1_0.clone(), oid_0_2_0.clone(), + oid_1_0_0.clone(), oid_2_0_0.clone(), + ]; + for oid in all_oids.iter() { + add_fake_object(&oid); + } + + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, all_oids); + + cas.remove_object(&oid_0_0_0).unwrap(); + + let oids = [ + oid_0_0_1.clone(), oid_0_0_2.clone(), + oid_0_1_0.clone(), oid_0_2_0.clone(), + oid_1_0_0.clone(), oid_2_0_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_0_0_2).unwrap(); + + let oids = [ + oid_0_0_1.clone(), + oid_0_1_0.clone(), oid_0_2_0.clone(), + oid_1_0_0.clone(), oid_2_0_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_0_0_1).unwrap(); + + let oids = [ + oid_0_1_0.clone(), oid_0_2_0.clone(), + oid_1_0_0.clone(), oid_2_0_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_2_0_0).unwrap(); + + let oids = [ + oid_0_1_0.clone(), oid_0_2_0.clone(), + oid_1_0_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_1_0_0).unwrap(); + + let oids = [ + oid_0_1_0.clone(), oid_0_2_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_0_1_0).unwrap(); + + let oids = [ + oid_0_2_0.clone(), + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + + cas.remove_object(&oid_0_2_0).unwrap(); + + let oids = [ + ]; + let objects = cas.iter_object_id().unwrap() + .collect::>>().unwrap(); + assert_eq!(objects, oids); + } +} diff --git a/cas-simple/src/lib.rs b/cas-simple/src/lib.rs index d65b5d0..57cfba3 100644 --- a/cas-simple/src/lib.rs +++ b/cas-simple/src/lib.rs @@ -23,223 +23,12 @@ extern crate digest; extern crate sha2; extern crate toml; extern crate cas_core; +extern crate tempfile; -use std::str::FromStr; -use std::path::{Path, PathBuf}; +mod utils; +mod wfile; +mod cas; -use digest::DynDigest; -use toml::Value; -use cas_core::{Cas, Error, hex, ObjectId, ObjectMetadata, ObjectType, Result}; - -fn new_digest(id: &str) -> Result> { - match id { - "sha256" => { Ok(Box::new(sha2::Sha256::default())) }, - _ => { Err(Error::error(format!("unknown digest '{}'", id))) } - } -} - - -pub struct SimpleCas { - db_path: PathBuf, - digest: Box, - config: Value, -} - - -impl SimpleCas { - pub fn create(db_path: PathBuf, config: Value) -> Result { - let digest_id = config["cas"]["digest"].as_str() - .ok_or_else(|| Error::error( - "mandatory cas.digest value is invalid or missing from config" - ))?; - let digest = new_digest(digest_id)?; - - if db_path.exists() { - return Err(Error::error(format!( - "failed to create SimpleCas: target directory already exists ({})", - db_path.to_string_lossy(), - ))); - } - - for path in [ - &db_path, - &obj_dir(&db_path), - &ref_dir(&db_path), - &tmp_dir(&db_path), - ] { - std::fs::create_dir(path).or_else(|e| - Err(Error::error(format!( - "failed to create directory ({}): {}", - path.to_string_lossy(), e, - ))) - )?; - } - - write_config(&config, &db_path)?; - - Ok(SimpleCas { - db_path, - digest, - config, - }) - } - - pub fn open(db_path: PathBuf) -> Result { - let config = read_config(&db_path)?; - - let digest_id = config["cas"]["digest"].as_str() - .ok_or_else(|| Error::error( - "mandatory cas.digest value is invalid or missing from config" - ))?; - let digest = new_digest(digest_id)?; - - Ok(SimpleCas { - db_path, - digest, - config, - }) - } - - pub fn save_config(&self) -> Result<()> { - write_config(&self.config, &self.db_path) - } -} - - -impl Cas for SimpleCas { - fn object_id_from_string(&self, hex: &str) -> Result { - if hex.len() == self.digest.output_size() * 2 { - ObjectId::from_str(hex) - } - else { - Err(Error::error(format!( - "invalid object id size: got {}, expected {}", - hex.len(), self.digest.output_size() * 2, - ))) - } - } - - fn object_id_from_partial(&self, _hex: &str) -> Result { - Err(Error::error("Not implemented")) - } - - fn has_object_id(&self, oid: &ObjectId) -> Result { - let opath = obj_path(&self.db_path, oid); - Ok(opath.is_file()) - } - - fn read_object(&self, _oid: &ObjectId) -> Result<(ObjectMetadata, &dyn std::io::Read)> { - Err(Error::error("Not implemented")) - } - - fn write_object(&mut self, _otype: ObjectType, _data: &mut dyn std::io::Read) -> Result { - Err(Error::error("Not implemented")) - } - - fn remove_object(&mut self, _oid: &ObjectId) -> Result<()> { - Err(Error::error("Not implemented")) - } - - fn read_ref(&self, _key: &str) -> Result { - Err(Error::error("Not implemented")) - } - - fn write_ref(&mut self, _key: &str, _value: &ObjectId) -> Result<()> { - Err(Error::error("Not implemented")) - } - - fn remove_ref(&mut self, _key: &str) -> Result<()> { - Err(Error::error("Not implemented")) - } -} - - -fn obj_dir(cas_path: &Path) -> PathBuf { - cas_path.join("obj") -} - -fn obj_path(cas_path: &Path, oid: &ObjectId) -> PathBuf { - let mut path = cas_path.to_path_buf(); - path.push(hex(&oid.id()[0..1])); - path.push(hex(&oid.id()[1..2])); - path.push(hex(&oid.id()[2..])); - path -} - -fn ref_dir(cas_path: &Path) -> PathBuf { - cas_path.join("ref") -} - -fn tmp_dir(cas_path: &Path) -> PathBuf { - cas_path.join("tmp") -} - -fn config_path(cas_path: &Path) -> PathBuf { - cas_path.join("config") -} - -fn read_config(db_path: &Path) -> Result { - use std::io::Read; - - let mut file = std::fs::File::open(config_path(db_path)).or_else(|err| - Err(Error::error(format!("invalid repository: no config file: {}", err))) - )?; - let mut config_str = String::new(); - file.read_to_string(&mut config_str).or_else(|err| - Err(Error::error(format!("cannot read config file: {}", err))) - )?; - let config = toml::from_str(&config_str).or_else(|err| - Err(Error::error(format!("error while reading config file: {}", err))) - )?; - Ok(config) -} - -fn write_config(config: &Value, db_path: &Path) -> Result<()> { - use std::io::Write; - - let config_str = toml::to_string_pretty(config).or_else(|err| - Err(Error::error(format!("failed to serialize config: {}", err))) - )?; - let mut file = tempfile::NamedTempFile::new_in(tmp_dir(db_path)).or_else(|err| - Err(Error::error(format!("cannot create temp config file: {}", err))) - )?; - file.write_all(config_str.as_bytes()).or_else(|err| - Err(Error::error(format!("failed to write to temp config: {}", err))) - )?; - file.persist(config_path(db_path)).or_else(|err| - Err(Error::error(format!("failed to (over)write config: {}", err))) - )?; - Ok(()) -} - - -#[cfg(test)] -mod tests { - extern crate tempfile; - - use super::*; - - #[test] - fn create_simple_cas() { - let dir = tempfile::tempdir() - .expect("failed to create temp test dir"); - let cas_path = { - let mut cas_path = dir.path().to_path_buf(); - cas_path.push(".bsv"); - cas_path - }; - let config = toml::toml!( - [cas] - digest = "sha256" - ); - - let cas = SimpleCas::create(cas_path, config) - .expect("failed to create SimpleCas object"); - let oid = cas.object_id_from_string("0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef") - .expect("failed to create object id"); - - assert!(!cas.has_object_id(&oid).expect("has_object_id failed")); - } -} +pub use cas::SimpleCas; diff --git a/cas-simple/src/utils.rs b/cas-simple/src/utils.rs new file mode 100644 index 0000000..5cbe802 --- /dev/null +++ b/cas-simple/src/utils.rs @@ -0,0 +1,210 @@ + +// This file is part of bsv. +// +// bsv is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// cdb is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for +// more details. +// +// You should have received a copy of the Affero GNU General Public License +// along with cdb. If not, see . + + +use std::path::{Path, PathBuf}; + +use digest::DynDigest; +use toml::Value; + +use cas_core::{ + Error, hex, ObjectId, ObjectMetadata, ObjectType, Result, +}; + + +pub fn obj_dir(cas_path: &Path) -> PathBuf { + cas_path.join("obj") +} + +pub fn obj_path(cas_path: &Path, oid: &ObjectId) -> PathBuf { + let mut path = obj_dir(cas_path); + path.push(hex(&oid.id()[0..1])); + path.push(hex(&oid.id()[1..2])); + path.push(hex(&oid.id()[2..])); + path +} + +pub fn ref_dir(cas_path: &Path) -> PathBuf { + cas_path.join("ref") +} + +pub fn tmp_dir(cas_path: &Path) -> PathBuf { + cas_path.join("tmp") +} + +pub fn config_path(cas_path: &Path) -> PathBuf { + cas_path.join("config") +} + + +pub fn read_config(db_path: &Path) -> Result { + use std::io::Read; + + let mut file = std::fs::File::open(config_path(db_path)).or_else(|err| + Err(Error::error(format!("invalid repository: no config file: {}", err))) + )?; + let mut config_str = String::new(); + file.read_to_string(&mut config_str).or_else(|err| + Err(Error::error(format!("cannot read config file: {}", err))) + )?; + let config = toml::from_str(&config_str).or_else(|err| + Err(Error::error(format!("error while reading config file: {}", err))) + )?; + Ok(config) +} + +pub fn write_config(config: &Value, db_path: &Path) -> Result<()> { + use std::io::Write; + + let config_str = toml::to_string_pretty(config).or_else(|err| + Err(Error::error(format!("failed to serialize config: {}", err))) + )?; + let mut file = tempfile::NamedTempFile::new_in(tmp_dir(db_path)).or_else(|err| + Err(Error::error(format!("cannot create temp config file: {}", err))) + )?; + file.write_all(config_str.as_bytes()).or_else(|err| + Err(Error::error(format!("failed to write to temp config: {}", err))) + )?; + file.persist(config_path(db_path)).or_else(|err| + Err(Error::error(format!("failed to (over)write config: {}", err))) + )?; + Ok(()) +} + + +pub fn read_metadata(read: &mut dyn std::io::Read) -> Result { + let otype = ObjectType::new(&{ + let mut buf = [0; 4]; + read.read_exact(&mut buf).or_else(|err| + Err(Error::error(format!("failed to read object type: {}", err))))?; + buf + })?; + let size = u64::from_be_bytes({ + let mut buf = [0; 8]; + read.read_exact(&mut buf).or_else(|err| + Err(Error::error(format!("failed to read object size: {}", err))))?; + buf + }); + Ok(ObjectMetadata::new(otype, size)) +} + +pub fn write_metadata(write: &mut dyn std::io::Write, otype: &ObjectType, size: u64) -> Result<()> { + write.write_all(otype.id()).or_else(|err| + Err(Error::error(format!("failed to write object type: {}", err))))?; + write.write_all(&size.to_be_bytes()).or_else(|err| + Err(Error::error(format!("failed to write object size: {}", err))))?; + Ok(()) +} + + +pub fn new_digest(id: &str) -> Result> { + match id { + "sha256" => { Ok(Box::new(sha2::Sha256::default())) }, + _ => { Err(Error::error(format!("unknown digest '{}'", id))) } + } +} + + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use super::*; + + #[test] + fn test_dirs() { + let base_path = PathBuf::from("/foo/bar"); + let oid = ObjectId::from_str("0123456789abcdef") + .expect("failed to create object id"); + + assert_eq!( + config_path(&base_path), + PathBuf::from("/foo/bar/config") + ); + assert_eq!( + obj_dir(&base_path), + PathBuf::from("/foo/bar/obj") + ); + assert_eq!( + obj_path(&base_path, &oid), + PathBuf::from("/foo/bar/obj/01/23/456789abcdef") + ); + assert_eq!( + ref_dir(&base_path), + PathBuf::from("/foo/bar/ref") + ); + assert_eq!( + tmp_dir(&base_path), + PathBuf::from("/foo/bar/tmp") + ); + } + + #[test] + fn test_read_write_config() { + let config = toml::toml!{ + [cas] + path = "/foo/bar" + digest = "sha1" + + [extra] + test = 42 + }; + + let dir = tempfile::TempDir::new() + .expect("failed to create tmp dir"); + std::fs::create_dir(tmp_dir(dir.path())).expect("failed to create db/tmp dir"); + + write_config(&config, dir.path()).expect("failed to write config"); + let config2 = read_config(dir.path()).expect("failed to read config"); + + assert_eq!(config2, config); + + dir.close().expect("failed to close tmp dir") + } + + #[test] + fn test_read_write_metadata() { + use std::io::Write; + + let data = b"test\x00\x00\x00\x00\x00\x00\x00\x0cHello World!"; + let reader: &mut dyn std::io::Read = &mut &data[..]; + let mut payload = Vec::new(); + + let metadata = read_metadata(reader).expect("failed to read metadata"); + reader.read_to_end(&mut payload).expect("failed to read payload"); + + let otype = ObjectType::new(b"test").expect("failed to create ObjectType"); + assert_eq!(metadata, ObjectMetadata::new(otype.clone(), 12)); + assert_eq!(payload, b"Hello World!"); + + let mut data2 = Vec::new(); + + write_metadata(&mut data2, &otype, payload.len() as u64).expect("failed to write metadata"); + data2.write_all(&payload).expect("failed to write payload"); + + assert_eq!(data2, data); + } + + #[test] + fn test_new_digest() { + let mut sha256 = new_digest("sha256").expect("failed to create sha256 digest"); + + sha256.update(b"Hello World!"); + assert_eq!(hex(&sha256.finalize()), "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069"); + + assert!(new_digest("do_not_exists").is_err()); + } +} diff --git a/cas-simple/src/wfile.rs b/cas-simple/src/wfile.rs new file mode 100644 index 0000000..af03e58 --- /dev/null +++ b/cas-simple/src/wfile.rs @@ -0,0 +1,106 @@ +// This file is part of bsv. +// +// bsv is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// cdb is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for +// more details. +// +// You should have received a copy of the Affero GNU General Public License +// along with cdb. If not, see . + + +use std::path::{PathBuf}; + +use cas_core::{ + Error, ObjectId, Result, Writer, +}; + +use crate::utils::{obj_path, tmp_dir}; + + +pub struct WFile { + file: tempfile::NamedTempFile, + db_path: PathBuf, +} + +impl WFile { + pub fn new(db_path: PathBuf) -> Result { + let file = tempfile::NamedTempFile::new_in(tmp_dir(&db_path)).or_else(|err| + Err(Error::error(format!("failed to create_file: {}", err))))?; + Ok(WFile { + file, + db_path, + }) + } +} + +impl std::io::Write for WFile { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.file.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.file.flush() + } +} + +impl Writer for WFile { + fn finalize(self: Box) -> Result { + Err(Error::error("reader pipline has no digest step")) + } + + fn _finalize(self: Box, oid: ObjectId) -> Result { + let path = obj_path(&self.db_path, &oid); + std::fs::create_dir_all( + path.parent().expect("cannot access to object parent directory") + ).or_else(|err| + Err(Error::error(format!("failed to create object dir: {}", err))))?; + self.file.persist(path).or_else(|err| + Err(Error::error(format!("failed to persist file: {}", err))))?; + Ok(oid) + } +} + + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use crate::utils::{obj_dir, obj_path}; + use super::*; + + #[test] + fn test_wfile() { + use std::io::{Read, Write}; + + let data = b"Hello World!"; + let oid = ObjectId::from_str( + "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069" + ).expect("invalid object id"); + + let dir = tempfile::TempDir::new() + .expect("failed to create tmp dir"); + + std::fs::create_dir(tmp_dir(dir.path())).expect("failed to create db/obj dir"); + std::fs::create_dir(obj_dir(dir.path())).expect("failed to create db/tmp dir"); + + { + let mut writer = Box::new(WFile::new(dir.path().to_path_buf()).expect("failed to create WFile")); + writer.write(data).expect("failed to write to WFile"); + let oid2 = writer._finalize(oid.clone()).expect("failed to finalize WFile"); + + assert_eq!(oid2, oid); + } + + let mut file = std::fs::File::open(obj_path(dir.path(), &oid)).expect("failed to open object file"); + let mut buf = Vec::new(); + file.read_to_end(&mut buf).expect("failed to read object file"); + + assert_eq!(buf, data); + } +}