From 5cca1829addc6e9c2f0108ea4f3a5cb27ed5e5a7 Mon Sep 17 00:00:00 2001 From: Draklaw Date: Mon, 15 Aug 2022 22:52:27 +0200 Subject: [PATCH] Implement ignore rules. --- Cargo.lock | 39 +------------- libbsv/Cargo.toml | 4 +- libbsv/src/ignore.rs | 118 +++++++++++++++++++++++++++++++++++++++---- libbsv/src/lib.rs | 4 +- 4 files changed, 112 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be2e32e..6b4be79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,15 +26,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "memchr", -] - [[package]] name = "camino" version = "1.0.7" @@ -87,12 +78,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - [[package]] name = "generic-array" version = "0.14.4" @@ -114,19 +99,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "globset" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" -dependencies = [ - "aho-corasick", - "bstr", - "fnv", - "log", - "regex", -] - [[package]] name = "libbsv" version = "0.1.0" @@ -134,7 +106,7 @@ dependencies = [ "camino", "cas-core", "cas-simple", - "globset", + "regex", "toml", ] @@ -144,15 +116,6 @@ version = "0.2.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" -[[package]] -name = "log" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] - [[package]] name = "memchr" version = "2.5.0" diff --git a/libbsv/Cargo.toml b/libbsv/Cargo.toml index f4ad236..b226b18 100644 --- a/libbsv/Cargo.toml +++ b/libbsv/Cargo.toml @@ -7,7 +7,7 @@ license = "AGPL-3.0-or-later" [dependencies] toml = "0.5.8" -camino = { version = "1.0.7" } -globset = "0.4.9" +camino = "1.0.7" +regex = "1.6.0" cas-core = { path = "../cas-core" } cas-simple = { path = "../cas-simple" } \ No newline at end of file diff --git a/libbsv/src/ignore.rs b/libbsv/src/ignore.rs index 8f78c4e..5244c40 100644 --- a/libbsv/src/ignore.rs +++ b/libbsv/src/ignore.rs @@ -15,12 +15,12 @@ use camino::{Utf8Path, Utf8PathBuf}; -use globset::GlobSet; +use regex::RegexSet; use cas_core::{err, Error, Result}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Action { Ignore, Accept, @@ -29,7 +29,7 @@ pub enum Action { #[derive(Debug)] pub struct IgnoreRules { - patterns: GlobSet, + patterns: RegexSet, actions: Vec, } @@ -37,21 +37,101 @@ pub struct IgnoreRules { impl IgnoreRules { pub fn new() -> Self { Self { - patterns: GlobSet::default(), + patterns: RegexSet::new(&[] as &[&str]).unwrap(), actions: vec![], } } - pub fn from_vec(vec: Vec<(Action, Utf8PathBuf)>) -> Result { - err!("Todo") - } + pub fn from_source>(source: &str, root: P) -> Result { + assert!(root.as_ref().is_absolute()); - pub fn from_ignore_file(ignore_file: &str, root: &Utf8Path) -> Result { - err!("Todo") - } + let separator = if std::path::MAIN_SEPARATOR == '/' { + "/" + } + else { + assert_eq!(std::path::MAIN_SEPARATOR, '\\'); + "\\\\" + }; + + let mut patterns = Vec::::default(); + let mut actions = Vec::::default(); + + for line in source.lines() { + let rule = line.trim(); + let mut rule_it = rule.chars().peekable(); + if rule_it.peek().is_none() || rule_it.peek() == Some(&'#') { + continue; + } + + if rule_it.peek() == Some(&'!') { + rule_it.next(); + actions.push(Action::Accept); + } + else { + actions.push(Action::Ignore); + } + + let mut pat = String::new(); + let mut last_is_sep = false; + + if rule_it.peek() == Some(&'/') { + pat.push_str("^"); + pat.push_str(®ex::escape(root.as_ref().as_str())); + } + + while let Some(c) = rule_it.next() { + if c == '/' { + pat.push_str(separator); + last_is_sep = true; + continue; + } + else if c == '*' { + if rule_it.peek() == Some(&'*') { + rule_it.next(); + if !last_is_sep || (!rule_it.peek().is_none() && rule_it.peek() != Some(&'/')) { + return err!("** pattern can only be used as a whole path segment"); + } + pat.push_str(".*"); + } + else { + pat.push_str("[^/]*"); + } + } + else if c == '\\' { + let c2 = rule_it.next().ok_or(Error::unknown("invalid \\ at end of rule"))?; + let mut buf = [0u8; 4]; + pat.push_str(®ex::escape(c2.encode_utf8(&mut buf))); + } + else { + let mut buf = [0u8; 4]; + pat.push_str(®ex::escape(c.encode_utf8(&mut buf))); + } + last_is_sep = false; + } + + if last_is_sep { + pat.pop(); + } + pat.push_str("(/.*)?$"); + + dbg!(&pat); + patterns.push(pat); + } - pub fn is_ignored>(&self, path: P) -> bool { + Ok(Self { + patterns: RegexSet::new(patterns) + .or_else(|err| err!("failed to compile ignore rules: {err}"))?, + actions: actions, + }) + } + pub fn action_for>(&self, path: P) -> Action { + assert!(path.as_ref().is_absolute()); + let index = self.patterns.matches(path.as_ref().as_str()) + .iter() + .next() + .unwrap_or(self.actions.len()); + *self.actions.get(index).unwrap_or(&Action::Accept) } } @@ -63,5 +143,21 @@ mod tests { #[test] fn test_path_map() { + let root = Utf8PathBuf::from("/foo.dir/bar"); + let patterns = "!hello/world\nhello\n/world/\n\\!\\*\n*.bak"; + let ignore = IgnoreRules::from_source(patterns, root).unwrap(); + + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/hello/aoeu")), Action::Ignore); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/hello_world/aoeu")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/world/aoeu")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/hello/world/aoeu")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/world/aoeu")), Action::Ignore); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/world")), Action::Ignore); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/fooXdir/bar/world")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/!*/aoeu")), Action::Ignore); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/file.bak")), Action::Ignore); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test/fileXbak")), Action::Accept); + assert_eq!(ignore.action_for(Utf8PathBuf::from("/foo.dir/bar/test.bak/file")), Action::Ignore); } } diff --git a/libbsv/src/lib.rs b/libbsv/src/lib.rs index 2b90c91..0067e64 100644 --- a/libbsv/src/lib.rs +++ b/libbsv/src/lib.rs @@ -16,7 +16,7 @@ extern crate toml; extern crate camino; -extern crate globset; +extern crate regex; extern crate cas_core; @@ -26,7 +26,7 @@ mod tree_item; // mod tree_walker; // mod config; mod path_map; -// mod ignore; +mod ignore; mod repository;