From 1c770a95608e0d315cadc8d746a7bdf3199e8ecd Mon Sep 17 00:00:00 2001 From: Edward Shen Date: Sat, 20 Feb 2021 00:33:17 -0500 Subject: [PATCH] completely refactor config --- src/config.rs | 410 ++++++++++++++++-------------- src/parser.rs | 78 +++--- src/values.rs | 19 +- tests/parser_integration_tests.rs | 7 +- 4 files changed, 265 insertions(+), 249 deletions(-) diff --git a/src/config.rs b/src/config.rs index f16268f..ca4b825 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,223 +1,243 @@ -use crate::parser::{parse_from_str, Event, ParsedSectionHeader, Parser}; -use crate::values::Value; -use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, convert::TryFrom, io::Read}; +use std::collections::HashMap; -type SectionConfig<'a> = HashMap<&'a str, Value<'a>>; +use crate::parser::{parse_from_str, Event, Parser, ParserError}; -/// This struct provides a high level wrapper to access `git-config` file. This -/// struct exists primarily for reading a config rather than modifying it, as -/// it discards comments and unnecessary whitespace. -#[derive(Clone, Eq, PartialEq, Debug, Default)] -pub struct GitConfig<'a>(HashMap<&'a str, HashMap<&'a str, SectionConfig<'a>>>); +#[derive(PartialEq, Eq, Hash, Copy, Clone, PartialOrd, Ord)] +struct SectionId(usize); -const EMPTY_MARKER: &str = "@"; // Guaranteed to not be a {sub,}section or name. +enum LookupTreeNode<'a> { + Terminal(Vec), + NonTerminal(HashMap<&'a str, Vec>), +} + +/// High level `git-config` reader and writer. +pub struct GitConfig<'a> { + front_matter_events: Vec>, + section_lookup_tree: HashMap<&'a str, Vec>>, + sections: HashMap>>, + section_header_separators: HashMap>, + section_id_counter: usize, +} impl<'a> GitConfig<'a> { - /// Attempts to construct a instance given a [`Parser`] instance. - /// - /// This is _not_ a zero-copy operation. Due to how partial values may be - /// provided, we necessarily need to copy and store these values until we - /// are done. - pub fn try_from_parser_with_options( - parser: Parser<'a>, - options: ConfigOptions, - ) -> Result { - Self::try_from_event_iter_with_options(parser.into_iter(), options) + /// Convenience constructor. Attempts to parse the provided string into a + /// [`GitConfig`]. + pub fn from_str(str: &'a str) -> Result { + Ok(Self::from_parser(parse_from_str(str)?)) } - pub fn try_from_event_iter_with_options( - iter: impl Iterator>, - options: ConfigOptions, - ) -> Result { - let mut sections: HashMap<&'a str, HashMap<&'a str, SectionConfig<'a>>> = HashMap::new(); - let mut current_section_name = EMPTY_MARKER; - let mut current_subsection_name = EMPTY_MARKER; - let mut ignore_until_next_section = false; - let mut current_key = EMPTY_MARKER; - let mut value_scratch = String::new(); + pub fn from_parser(parser: Parser<'a>) -> Self { + // Monotonically increasing + let mut section_id_counter: usize = 0; - for event in iter { + // Fields for the struct + let mut front_matter_events: Vec> = vec![]; + let mut sections: HashMap>> = HashMap::new(); + let mut section_lookup_tree: HashMap<&str, Vec> = HashMap::new(); + let mut section_header_separators = HashMap::new(); + + // Current section that we're building + let mut current_section_name: Option<&str> = None; + let mut current_subsection_name: Option<&str> = None; + let mut maybe_section: Option>> = None; + + for event in parser.into_iter() { match event { - Event::Comment(_) => (), - Event::SectionHeader(ParsedSectionHeader { - name, - separator: _, - subsection_name, - }) => { - current_section_name = name; - match (sections.get_mut(name), options.on_duplicate_section) { - (Some(_), OnDuplicateBehavior::Error) => todo!(), - (Some(section), OnDuplicateBehavior::Overwrite) => { - section.clear(); - } - (Some(_), OnDuplicateBehavior::KeepExisting) => { - ignore_until_next_section = true; - } - (None, _) => { - sections.insert(name, HashMap::default()); + e @ Event::Comment(_) => match maybe_section { + Some(ref mut section) => section.push(e), + None => front_matter_events.push(e), + }, + Event::SectionHeader(header) => { + // Push current section to struct + let new_section_id = SectionId(section_id_counter); + if let Some(section) = maybe_section.take() { + sections.insert(new_section_id, section); + let lookup = section_lookup_tree + .entry(current_section_name.unwrap()) + .or_default(); + + let mut found_node = false; + if let Some(subsection_name) = current_subsection_name { + for node in lookup.iter_mut() { + if let LookupTreeNode::NonTerminal(subsection) = node { + found_node = true; + subsection + .entry(subsection_name) + .or_default() + .push(new_section_id); + break; + } + } + if !found_node { + let mut map = HashMap::new(); + map.insert(subsection_name, vec![new_section_id]); + lookup.push(LookupTreeNode::NonTerminal(map)); + } + } else { + for node in lookup.iter_mut() { + if let LookupTreeNode::Terminal(vec) = node { + found_node = true; + vec.push(new_section_id); + break; + } + } + if !found_node { + lookup.push(LookupTreeNode::Terminal(vec![new_section_id])) + } } + + section_id_counter += 1; } - match subsection_name { - Some(v) => current_subsection_name = v, - None => { - current_subsection_name = EMPTY_MARKER; - continue; - } - }; - - // subsection parsing - - match ( - sections - .get_mut(current_section_name) - .unwrap() // Guaranteed to exist at this point - .get_mut(current_subsection_name), - options.on_duplicate_section, - ) { - (Some(_), OnDuplicateBehavior::Error) => todo!(), - (Some(section), OnDuplicateBehavior::Overwrite) => section.clear(), - (Some(_), OnDuplicateBehavior::KeepExisting) => { - ignore_until_next_section = true; - } - (None, _) => (), - } + // Initialize new section + let (name, subname) = (header.name, header.subsection_name); + maybe_section = Some(vec![]); + current_section_name = Some(name); + current_subsection_name = subname; + // We need to store the new, current id counter, so don't + // use new_section_id here and use the already incremented + // section id value. + section_header_separators + .insert(SectionId(section_id_counter), header.separator); } - _ if ignore_until_next_section => (), - Event::Key(key) => { - current_key = key; - } - Event::Value(v) => { - Self::insert_value( - &mut sections, - current_section_name, - current_subsection_name, - current_key, - v, - options.on_duplicate_name, - )?; - } - Event::Newline(_) => (), - Event::ValueNotDone(v) => value_scratch.push_str(v), - Event::ValueDone(v) => { - let mut completed_value = String::new(); - value_scratch.push_str(v); - std::mem::swap(&mut completed_value, &mut value_scratch); - Self::insert_value( - &mut sections, - current_section_name, - current_subsection_name, - current_key, - Value::from_string(completed_value), - options.on_duplicate_name, - )?; - } - Event::Whitespace(_) => (), + e @ Event::Key(_) => maybe_section + .as_mut() + .expect("Got a section-only event before a section") + .push(e), + e @ Event::Value(_) => maybe_section + .as_mut() + .expect("Got a section-only event before a section") + .push(e), + e @ Event::Newline(_) => match maybe_section { + Some(ref mut section) => section.push(e), + None => front_matter_events.push(e), + }, + e @ Event::ValueNotDone(_) => maybe_section + .as_mut() + .expect("Got a section-only event before a section") + .push(e), + e @ Event::ValueDone(_) => maybe_section + .as_mut() + .expect("Got a section-only event before a section") + .push(e), + e @ Event::Whitespace(_) => match maybe_section { + Some(ref mut section) => section.push(e), + None => front_matter_events.push(e), + }, } } - Ok(Self(sections)) + Self { + front_matter_events, + section_lookup_tree, + sections, + section_header_separators, + section_id_counter, + } } - fn insert_value( - map: &mut HashMap<&'a str, HashMap<&'a str, SectionConfig<'a>>>, - section: &str, - subsection: &str, - key: &'a str, - value: Value<'a>, - on_dup: OnDuplicateBehavior, - ) -> Result<(), ()> { - let config = map.get_mut(section).unwrap().get_mut(subsection).unwrap(); + pub fn get_raw_single_value<'b>( + &self, + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result<&'a str, GitConfigError<'b>> { + // Note: cannot wrap around the raw_multi_value method because we need + // to guarantee that the highest section id is used (so that we follow + // the "last one wins" resolution strategy by `git-config`). + let section_id = self + .get_section_id_by_name_and_subname(section_name, subsection_name) + .ok_or(GitConfigError::SubSectionDoesNotExist(subsection_name))?; - if config.contains_key(key) { - match on_dup { - OnDuplicateBehavior::Error => return Err(()), - OnDuplicateBehavior::Overwrite => { - config.insert(key, value); - } - OnDuplicateBehavior::KeepExisting => (), + // section_id is guaranteed to exist in self.sections, else we have a + // violated invariant. + let events = self.sections.get(§ion_id).unwrap(); + let mut found_key = false; + for event in events { + match event { + Event::Key(event_key) if *event_key == key => found_key = true, + Event::Value(v) if found_key => return Ok(v), + _ => (), } + } + + Err(GitConfigError::KeyDoesNotExist(key)) + } + + fn get_section_id_by_name_and_subname<'b>( + &'a self, + section_name: &'b str, + subsection_name: Option<&'b str>, + ) -> Option { + self.get_section_ids_by_name_and_subname(section_name, subsection_name) + .map(|vec| vec.into_iter().max()) + .flatten() + } + + pub fn get_raw_multi_value<'b>( + &'a self, + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result, GitConfigError<'b>> { + let values = self + .get_section_ids_by_name_and_subname(section_name, subsection_name) + .ok_or(GitConfigError::SubSectionDoesNotExist(subsection_name))? + .iter() + .map(|section_id| { + let mut found_key = false; + // section_id is guaranteed to exist in self.sections, else we have a + // violated invariant. + for event in self.sections.get(section_id).unwrap() { + match event { + Event::Key(event_key) if *event_key == key => found_key = true, + Event::Value(v) if found_key => return Ok(*v), + _ => (), + } + } + + Err(GitConfigError::KeyDoesNotExist(key)) + }) + .filter_map(Result::ok) + .collect::>(); + + if values.is_empty() { + Err(GitConfigError::KeyDoesNotExist(key)) } else { - config.insert(key, value); + Ok(values) } - - Ok(()) } - pub fn get_section(&self, section_name: &str) -> Option<&SectionConfig<'_>> { - self.get_subsection(section_name, EMPTY_MARKER) - } - - pub fn get_section_value(&self, section_name: &str, key: &str) -> Option<&Value<'_>> { - self.get_section(section_name) - .map(|section| section.get(key)) - .flatten() - } - - pub fn get_subsection( - &self, - section_name: &str, - subsection_name: &str, - ) -> Option<&SectionConfig<'_>> { - self.0 - .get(section_name) - .map(|subsections| subsections.get(subsection_name)) - .flatten() - } - - pub fn get_subsection_value( - &self, - section_name: &str, - subsection_name: &str, - key: &str, - ) -> Option<&Value<'_>> { - self.get_subsection(section_name, subsection_name) - .map(|section| section.get(key)) - .flatten() + fn get_section_ids_by_name_and_subname<'b>( + &'a self, + section_name: &'b str, + subsection_name: Option<&'b str>, + ) -> Option> { + let section_ids = self.section_lookup_tree.get(section_name)?; + if let Some(subsect_name) = subsection_name { + let mut maybe_ids = None; + for node in section_ids { + if let LookupTreeNode::NonTerminal(subsection_lookup) = node { + maybe_ids = subsection_lookup.get(subsect_name); + break; + } + } + maybe_ids.map(|vec| vec.clone()) + } else { + let mut maybe_ids = None; + for node in section_ids { + if let LookupTreeNode::Terminal(subsection_lookup) = node { + maybe_ids = subsection_lookup.iter().max(); + break; + } + } + maybe_ids.map(|v| vec![*v]) + } } } -impl<'a> TryFrom> for GitConfig<'a> { - type Error = (); - - fn try_from(parser: Parser<'a>) -> Result { - Self::try_from_parser_with_options(parser, ConfigOptions::default()) - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] -pub struct ConfigOptions { - on_duplicate_section: OnDuplicateBehavior, - on_duplicate_name: OnDuplicateBehavior, -} - -impl ConfigOptions { - pub fn on_duplicate_section(&mut self, behavior: OnDuplicateBehavior) -> &mut Self { - self.on_duplicate_section = behavior; - self - } - - pub fn on_duplicate_name(&mut self, behavior: OnDuplicateBehavior) -> &mut Self { - self.on_duplicate_name = behavior; - self - } -} - -/// [`GitConfig`]'s valid possible actions when encountering a duplicate section -/// or key name within a section. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] -pub enum OnDuplicateBehavior { - /// Fail the operation, returning an error instead. This is the strictest - /// behavior, and is the default. - Error, - /// Discard any data we had before on the - Overwrite, - KeepExisting, -} - -impl Default for OnDuplicateBehavior { - fn default() -> Self { - Self::Error - } +pub enum GitConfigError<'a> { + SectionDoesNotExist(&'a str), + SubSectionDoesNotExist(Option<&'a str>), + KeyDoesNotExist(&'a str), } diff --git a/src/parser.rs b/src/parser.rs index 064ac59..8291fae 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,6 @@ //! which can be converted into an [`Event`] iterator. The [`Parser`] also has //! additional methods for accessing leading comments or events by section. -use crate::values::{Boolean, TrueVariant, Value}; use nom::bytes::complete::{escaped, tag, take_till, take_while}; use nom::character::complete::{char, none_of, one_of}; use nom::character::{is_newline, is_space}; @@ -22,21 +21,32 @@ use std::iter::FusedIterator; /// Syntactic events that occurs in the config. #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] pub enum Event<'a> { + /// A comment with a comment tag and the comment itself. Note that the + /// comment itself may contain additional whitespace and comment markers + /// at the beginning. Comment(ParsedComment<'a>), + /// A section header containing the section name and a subsection, if it + /// exists. SectionHeader(ParsedSectionHeader<'a>), + /// A name to a value in a section. Key(&'a str), - /// - Value(Value<'a>), + /// A completed value. This may be any string, including the empty string, + /// if an implicit boolean value is used. Note that these values may contain + /// spaces and any special character. This value is also unprocessed, so it + /// it may contain double quotes that should be replaced. + Value(&'a str), /// Represents any token used to signify a new line character. On Unix /// platforms, this is typically just `\n`, but can be any valid newline /// sequence. Newline(&'a str), /// Any value that isn't completed. This occurs when the value is continued /// onto the next line. A Newline event is guaranteed after, followed by - /// either another ValueNotDone or a ValueDone. + /// either a ValueDone, a Whitespace, or another ValueNotDone. ValueNotDone(&'a str), /// The last line of a value which was continued onto another line. ValueDone(&'a str), + /// A continuous section of insignificant whitespace. Values with internal + /// spaces will not be separated by this event. Whitespace(&'a str), } @@ -353,12 +363,7 @@ fn config_value<'a>(i: &'a str) -> IResult<&'a str, Vec>> { Ok((i, values)) } } else { - Ok(( - i, - vec![Event::Value(Value::Boolean(Boolean::True( - TrueVariant::Implicit, - )))], - )) + Ok((i, vec![Event::Value("")])) } } @@ -449,7 +454,7 @@ fn value_impl<'a>(i: &'a str) -> IResult<&'a str, Vec>> { if partial_value_found { events.push(Event::ValueDone(remainder_value)); } else { - events.push(Event::Value(Value::from_str(remainder_value))); + events.push(Event::Value(remainder_value)); } Ok((i, events)) @@ -643,7 +648,7 @@ mod parse { fn no_comment() { assert_eq!( value_impl("hello").unwrap(), - fully_consumed(vec![Event::Value(Value::from_str("hello"))]) + fully_consumed(vec![Event::Value("hello")]) ); } @@ -651,7 +656,7 @@ mod parse { fn no_comment_newline() { assert_eq!( value_impl("hello\na").unwrap(), - ("\na", vec![Event::Value(Value::from_str("hello"))]) + ("\na", vec![Event::Value("hello")]) ) } @@ -659,7 +664,7 @@ mod parse { fn semicolon_comment_not_consumed() { assert_eq!( value_impl("hello;world").unwrap(), - (";world", vec![Event::Value(Value::from_str("hello")),]) + (";world", vec![Event::Value("hello"),]) ); } @@ -667,7 +672,7 @@ mod parse { fn octothorpe_comment_not_consumed() { assert_eq!( value_impl("hello#world").unwrap(), - ("#world", vec![Event::Value(Value::from_str("hello")),]) + ("#world", vec![Event::Value("hello"),]) ); } @@ -675,10 +680,7 @@ mod parse { fn values_with_extraneous_whitespace_without_comment() { assert_eq!( value_impl("hello ").unwrap(), - ( - " ", - vec![Event::Value(Value::from_str("hello"))] - ) + (" ", vec![Event::Value("hello")]) ); } @@ -686,17 +688,11 @@ mod parse { fn values_with_extraneous_whitespace_before_comment() { assert_eq!( value_impl("hello #world").unwrap(), - ( - " #world", - vec![Event::Value(Value::from_str("hello")),] - ) + (" #world", vec![Event::Value("hello"),]) ); assert_eq!( value_impl("hello ;world").unwrap(), - ( - " ;world", - vec![Event::Value(Value::from_str("hello")),] - ) + (" ;world", vec![Event::Value("hello"),]) ); } @@ -704,10 +700,7 @@ mod parse { fn trans_escaped_comment_marker_not_consumed() { assert_eq!( value_impl(r##"hello"#"world; a"##).unwrap(), - ( - "; a", - vec![Event::Value(Value::from_str(r##"hello"#"world"##)),] - ) + ("; a", vec![Event::Value(r##"hello"#"world"##)]) ); } @@ -715,10 +708,7 @@ mod parse { fn complex_test() { assert_eq!( value_impl(r#"value";";ahhhh"#).unwrap(), - ( - ";ahhhh", - vec![Event::Value(Value::from_str(r#"value";""#)),] - ) + (";ahhhh", vec![Event::Value(r#"value";""#)]) ); } @@ -807,17 +797,17 @@ mod parse { Event::Key("a"), Event::Whitespace(" "), Event::Whitespace(" "), - Event::Value(Value::from_str("b")), + Event::Value("b"), Event::Newline("\n"), Event::Whitespace(" "), Event::Key("c"), - Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))), + Event::Value(""), Event::Newline("\n"), Event::Whitespace(" "), Event::Key("d"), Event::Whitespace(" "), Event::Whitespace(" "), - Event::Value(Value::from_str("\"lol\"")) + Event::Value("\"lol\"") ] }) ) @@ -829,11 +819,7 @@ mod parse { section("[hello] c").unwrap(), fully_consumed(ParsedSection { section_header: gen_section_header("hello", None), - events: vec![ - Event::Whitespace(" "), - Event::Key("c"), - Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))) - ] + events: vec![Event::Whitespace(" "), Event::Key("c"), Event::Value("")] }) ); } @@ -860,7 +846,7 @@ mod parse { Event::Key("a"), Event::Whitespace(" "), Event::Whitespace(" "), - Event::Value(Value::from_str("b")), + Event::Value("b"), Event::Whitespace(" "), Event::Comment(ParsedComment { comment_tag: '#', @@ -883,7 +869,7 @@ mod parse { Event::Key("c"), Event::Whitespace(" "), Event::Whitespace(" "), - Event::Value(Value::from_str("d")), + Event::Value("d"), ] }) ); @@ -947,7 +933,7 @@ mod parse { events: vec![ Event::Key("hello"), Event::Whitespace(" "), - Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))), + Event::Value(""), Event::Comment(ParsedComment { comment_tag: '#', comment: "world", diff --git a/src/values.rs b/src/values.rs index 686da89..92d0e44 100644 --- a/src/values.rs +++ b/src/values.rs @@ -12,7 +12,18 @@ pub enum Value<'a> { impl<'a> Value<'a> { pub fn from_str(s: &'a str) -> Self { - // if s. + if let Ok(bool) = Boolean::from_str(s) { + return Self::Boolean(bool); + } + + // if let Ok(int) = Integer::from_str(s) { + // return Self::Integer(int); + // } + + // if let Ok(color) = Color::from_str(s) { + // return Self::Color(color); + // } + Self::Other(Cow::Borrowed(s)) } @@ -152,7 +163,11 @@ pub struct Integer { suffix: Option, } -impl Integer {} +impl Integer { + pub fn from_str(s: &str) -> Result { + todo!() + } +} impl Display for Integer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/tests/parser_integration_tests.rs b/tests/parser_integration_tests.rs index c672c86..7bdc642 100644 --- a/tests/parser_integration_tests.rs +++ b/tests/parser_integration_tests.rs @@ -1,9 +1,4 @@ use serde_git_config::parser::{parse_from_str, Event, ParsedSectionHeader}; -use serde_git_config::values::Value; - -fn fully_consumed(t: T) -> (&'static str, T) { - ("", t) -} fn gen_section_header( name: &str, @@ -30,7 +25,7 @@ fn name(name: &'static str) -> Event<'static> { } fn value(value: &'static str) -> Event<'static> { - Event::Value(Value::from_str(value)) + Event::Value(value) } fn newline() -> Event<'static> {