From 956d2d337ca8be93ec861c755e929c88af6b6f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Boy=C3=A9?= Date: Sat, 4 Mar 2023 18:02:30 +0100 Subject: [PATCH] TreeWalker WIP. --- cas-core/src/error.rs | 3 + cas-core/src/object_id.rs | 8 ++ libbsv/.gitignore | 2 - libbsv/src/lib.rs | 2 +- libbsv/src/tree_item.rs | 4 +- libbsv/src/tree_walker.rs | 206 ++++++++++++++++++-------------------- 6 files changed, 111 insertions(+), 114 deletions(-) delete mode 100644 libbsv/.gitignore diff --git a/cas-core/src/error.rs b/cas-core/src/error.rs index c266474..4214c07 100644 --- a/cas-core/src/error.rs +++ b/cas-core/src/error.rs @@ -84,6 +84,9 @@ pub enum Error { #[error("io error: {0}")] IoError(#[from] std::io::Error), + #[error("non-unicode file name: '{0}'")] + NonUnicodeFileName(String), + #[error("{0}")] UnknownError(String), } diff --git a/cas-core/src/object_id.rs b/cas-core/src/object_id.rs index 898af3a..52afe93 100644 --- a/cas-core/src/object_id.rs +++ b/cas-core/src/object_id.rs @@ -57,6 +57,14 @@ impl FromStr for ObjectId { } } +impl Default for ObjectId { + fn default() -> Self { + return Self { + id: Arc::new(vec![]), + } + } +} + impl fmt::Display for ObjectId { fn fmt(&self, f: &mut fmt::Formatter) -> std::result::Result<(), fmt::Error> { write_hex(f, self.id.as_slice()) diff --git a/libbsv/.gitignore b/libbsv/.gitignore deleted file mode 100644 index 96ef6c0..0000000 --- a/libbsv/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/target -Cargo.lock diff --git a/libbsv/src/lib.rs b/libbsv/src/lib.rs index 0067e64..1e82961 100644 --- a/libbsv/src/lib.rs +++ b/libbsv/src/lib.rs @@ -23,7 +23,7 @@ extern crate cas_core; mod permissions; mod tree_item; -// mod tree_walker; +mod tree_walker; // mod config; mod path_map; mod ignore; diff --git a/libbsv/src/tree_item.rs b/libbsv/src/tree_item.rs index 7bb3ebd..58b3ff5 100644 --- a/libbsv/src/tree_item.rs +++ b/libbsv/src/tree_item.rs @@ -51,12 +51,12 @@ pub struct TreeItem { } impl TreeItem { - pub fn from_metadata(name: &str, metadata: &std::fs::Metadata, oid: ObjectId) -> Result { + pub fn from_metadata(name: String, metadata: &std::fs::Metadata, oid: ObjectId) -> Result { let otype = otype_from_metadata(metadata)?; let permissions = Permissions::from_metadata(metadata)?; Ok(Self { - name: name.to_string(), + name: name, otype, size: metadata.len(), created: metadata.created().unwrap_or(UNIX_EPOCH), diff --git a/libbsv/src/tree_walker.rs b/libbsv/src/tree_walker.rs index 7fafba6..9c52e85 100644 --- a/libbsv/src/tree_walker.rs +++ b/libbsv/src/tree_walker.rs @@ -14,130 +14,118 @@ // along with cdb. If not, see . -use std::path::Utf8Path; -// use std::fs::Metadata; -use std::fs::{DirEntry, ReadDir, read_dir}; +use std::iter::Peekable; +use std::fs::{DirEntry, Metadata, ReadDir, read_dir}; +use std::vec::IntoIter; -use cas_core::{err, Error, Result}; +use camino::{Utf8Path, Utf8PathBuf}; -// use crate::{PathPair, Repository, TreeItem}; +use cas_core::{err, Error, ObjectId, Result}; +use crate::{PathPair, Repository, TreeItem}; -#[derive(Debug)] -pub struct RecursiveDirIterator { - dir_iterators: Vec, + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Action { + Add, + Update, + Remove, + Skip, + Ignore, } -impl RecursiveDirIterator { - pub fn new>(root_dir: P) -> Result { - Ok(RecursiveDirIterator { - dir_iterators: vec![read_dir(root_dir)?], - }) - } +pub struct TreeWalker { + dir_it: Peekable>>, + prev_tree_it: Peekable>, +} - pub fn pop_dir(&mut self) -> Result<()> { - match self.dir_iterators.pop() { - Some(_) => Ok(()), - None => err!("cannot pop directory: iterator reached the end"), - } + +impl TreeWalker { + pub fn new>(path: P, prev_tree: Vec) -> Result { + let dir_entries = read_dir(path.as_ref().to_path_buf())? + .map(|res| res.map_err(|err| err.into())) + .collect::>>()?; + + let mut dir_items: Vec<_> = dir_entries.into_iter() + .map(|dir_entry| { + let file_name = dir_entry + .file_name() + .into_string() + // .or_else(|os_string| err!("non-unicode file name '{}'", os_string.to_string_lossy()))?; + .or_else(|os_string| Err(Error::NonUnicodeFileName(os_string.to_string_lossy().into())))?; + let metadata = dir_entry.metadata()?; + Ok(TreeItem::from_metadata( + file_name, + &metadata, + ObjectId::default() + )? + ) + }) + .collect(); + dir_items.sort_unstable_by_key(|result| { + match result { + Ok(entry) => entry.name.clone(), + Err(_) => String::default(), + } + }); + + Ok(Self { + dir_it: dir_items.into_iter().peekable(), + prev_tree_it: prev_tree.into_iter().peekable(), + }) } } -impl Iterator for RecursiveDirIterator { - type Item = Result; +impl Iterator for TreeWalker { + type Item = Result<(Action, TreeItem)>; fn next(&mut self) -> Option { - while let Some(top_it) = self.dir_iterators.last_mut() { - let next = top_it.next(); - if let Some(item) = next { - let item = item.and_then(|dir_entry| { - if let Ok(file_type) = dir_entry.file_type() { - if file_type.is_dir() { - self.dir_iterators.push(read_dir(dir_entry.path())?) - } - } - Ok(dir_entry) - }); - return Some(item.map_err(Into::into)); - } - else { - self.dir_iterators.pop(); + match (self.dir_it.peek(), self.prev_tree_it.peek()) { + (Some(Err(_)), _) => { + Some(Err(self.dir_it.next().unwrap().unwrap_err())) } + (Some(Ok(curr_item)), Some(prev_item)) => { + if curr_item.name == prev_item.name { + let action = + if curr_item.modified != prev_item.modified { + Action::Update + } + else { + Action::Skip + }; + self.prev_tree_it.next(); + Some(Ok((action, self.dir_it.next().unwrap().unwrap()))) + } + else if curr_item.name < prev_item.name { + Some(Ok((Action::Add, self.dir_it.next().unwrap().unwrap()))) + } + else { + Some(Ok((Action::Remove, self.prev_tree_it.next().unwrap()))) + } + }, + (Some(_), None) => { + Some(Ok((Action::Add, self.dir_it.next().unwrap().unwrap()))) + }, + (None, Some(_)) => { + Some(Ok((Action::Remove, self.prev_tree_it.next().unwrap()))) + }, + (None, None) => None, } - None } } -// pub trait FsWalker { -// fn visit(&self) -// } - - -// #[derive(Debug, Clone, Copy, PartialEq, Eq)] -// pub enum Action { -// Default, -// Add, -// Update, -// Remove, -// Skip, -// Ignore, -// } - - -// #[derive()] -// pub struct TreeWalker<'repo> { -// repository: &'repo Repository, -// rules: Vec) -> Result>>, -// default_action: Action, -// reporters: Vec) -> Result<()>>>, -// } - - -// impl<'repo> TreeWalker<'repo> { -// pub fn process>(&self, physic_path: P) -> Result> -// { -// let physic_path_ref = physic_path.as_ref(); - -// let metadata = std::fs::symlink_metadata(physic_path_ref) -// .or_else(|err| err!("failed to read {}: {}", physic_path_ref, err))?; - -// let path_pair = match self.repository.path_pair_from_physic_path(physic_path)? { -// Some(path_pair) => path_pair, -// None => return Ok(None), -// }; - -// let maybe_tree = self.repository -// .oid_from_logic_path(&path_pair.logic) -// .ok() -// .map(|oid| self.repository.read_tree(&oid)) -// .transpose()? -// .flatten(); - -// self.process_impl(&path_pair, &metadata, &maybe_tree) -// .map(|oid| Some(oid)) -// } - -// fn process_impl(&self, path_pair: &PathPair, metadata: &Metadata, maybe_tree: &Option>) -> Result { -// err!("not implemented") -// } - -// fn eval_rules(&self, path: &Utf8Path, maybe_item: Option<&TreeItem>) -> Result { -// self.rules.iter() -// .map(|rule| -// rule(path, maybe_item) -// ) -// .skip_while(|action_result| -// *action_result == Ok(Action::Default) -// ) -// .nth(0) -// .unwrap_or(Ok(self.default_action)) -// } - -// fn report(&self, path: &Utf8Path, action: Action, maybe_oid: Option<&ObjectId>) -> Result<()> { -// for reporter in &self.reporters { -// reporter(path, action, maybe_oid)? -// } -// Ok(()) -// } -// } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tree_walker() { + for item in TreeWalker::new("/home/draklaw/tmp", vec![]).unwrap() { + match item { + Ok((action, tree_item)) => println!("{:?} {:?}", action, tree_item.name), + Err(err) => println!("error while iterating directory: {}", err), + } + } + } +}