diff --git a/Cargo.toml b/Cargo.toml index 364b193..5ec2cd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ exclude = ["fuzz/**/*", ".vscode/**/*"] serde = ["serde_crate"] [dependencies] -bstr = "0.2.15" nom = { version = "6", default_features = false, features = ["std"] } serde_crate = { version = "1", package = "serde", optional = true } diff --git a/src/config.rs b/src/config.rs index 54482af..a730f16 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,20 +1,17 @@ use crate::parser::{parse_from_bytes, Event, ParsedSectionHeader, Parser, ParserError}; -use bstr::BStr; +use std::collections::{HashMap, VecDeque}; use std::convert::TryFrom; +use std::error::Error; use std::{borrow::Cow, fmt::Display}; -use std::{ - collections::{HashMap, VecDeque}, - error::Error, -}; #[derive(PartialEq, Eq, Hash, Copy, Clone, PartialOrd, Ord, Debug)] pub enum GitConfigError<'a> { /// The requested section does not exist. - SectionDoesNotExist(&'a BStr), + SectionDoesNotExist(&'a str), /// The requested subsection does not exist. - SubSectionDoesNotExist(Option<&'a BStr>), + SubSectionDoesNotExist(Option<&'a str>), /// The key does not exist in the requested section. - KeyDoesNotExist(&'a BStr), + KeyDoesNotExist(&'a str), FailedConversion, } @@ -22,8 +19,11 @@ impl Display for GitConfigError<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { // Todo, try parse as utf8 first for better looking errors - Self::SectionDoesNotExist(s) => write!(f, "Subsection '{}' does not exist.", s), - Self::SubSectionDoesNotExist(s) => write!(f, "Subsection '{:?}' does not exist.", s), + Self::SectionDoesNotExist(s) => write!(f, "Section '{}' does not exist.", s), + Self::SubSectionDoesNotExist(s) => match s { + Some(s) => write!(f, "Subsection '{}' does not exist.", s), + None => write!(f, "Top level section does not exist."), + }, Self::KeyDoesNotExist(k) => write!(f, "Name '{}' does not exist.", k), Self::FailedConversion => write!(f, "Failed to convert to specified type."), } @@ -49,7 +49,7 @@ struct SectionId(usize); #[derive(PartialEq, Eq, Clone, Debug)] enum LookupTreeNode<'a> { Terminal(Vec), - NonTerminal(HashMap, Vec>), + NonTerminal(HashMap, Vec>), } /// High level `git-config` reader and writer. /// @@ -86,20 +86,20 @@ enum LookupTreeNode<'a> { /// # use std::borrow::Cow; /// # use std::convert::TryFrom; /// # let git_config = GitConfig::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); -/// assert_eq!(git_config.get_raw_value("core", None, "a"), Ok(&Cow::Borrowed("d".into()))); +/// assert_eq!(git_config.get_raw_value("core", None, "a"), Ok(&Cow::Borrowed("d".as_bytes()))); /// ``` /// /// Consider the `multi` variants of the methods instead, if you want to work /// with all values instead. /// -/// [`get_value`]: Self::get_value +/// [`get_raw_value`]: Self::get_raw_value #[derive(PartialEq, Eq, Clone, Debug)] pub struct GitConfig<'a> { /// The list of events that occur before an actual section. Since a /// `git-config` file prohibits global values, this vec is limited to only /// comment, newline, and whitespace events. front_matter_events: Vec>, - section_lookup_tree: HashMap, Vec>>, + section_lookup_tree: HashMap, Vec>>, /// SectionId to section mapping. The value of this HashMap contains actual /// events sections: HashMap>>, @@ -111,8 +111,8 @@ pub struct GitConfig<'a> { impl<'a> GitConfig<'a> { fn push_section( &mut self, - current_section_name: Option>, - current_subsection_name: Option>, + current_section_name: Option>, + current_subsection_name: Option>, maybe_section: &mut Option>>, ) { if let Some(section) = maybe_section.take() { @@ -184,7 +184,7 @@ impl<'a> GitConfig<'a> { /// "#; /// let git_config = GitConfig::try_from(config).unwrap(); /// // You can either use the turbofish to determine the type... - /// let a_value = git_config.get_value::("core", None, "a")?; + /// let a_value = git_config.get_value::("core", None, "a")?; /// // ... or explicitly declare the type to avoid the turbofish /// let c_value: Boolean = git_config.get_value("core", None, "c")?; /// # Ok::<(), GitConfigError>(()) @@ -198,11 +198,11 @@ impl<'a> GitConfig<'a> { /// /// [`values`]: crate::values /// [`TryFrom`]: std::convert::TryFrom - pub fn get_value<'b, 'c, T: TryFrom<&'c [u8]>, S: Into<&'b BStr>>( + pub fn get_value<'b, 'c, T: TryFrom<&'c [u8]>>( &'c self, - section_name: S, - subsection_name: Option, - key: S, + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, ) -> Result> { T::try_from(self.get_raw_value(section_name, subsection_name, key)?) .map_err(|_| GitConfigError::FailedConversion) @@ -210,8 +210,8 @@ impl<'a> GitConfig<'a> { fn get_section_id_by_name_and_subname<'b>( &'a self, - section_name: &'b BStr, - subsection_name: Option<&'b BStr>, + section_name: &'b str, + subsection_name: Option<&'b str>, ) -> Result> { self.get_section_ids_by_name_and_subname(section_name, subsection_name) .map(|vec| { @@ -231,20 +231,17 @@ impl<'a> GitConfig<'a> { /// /// This function will return an error if the key is not in the requested /// section and subsection, or if the section and subsection do not exist. - pub fn get_raw_value<'b, S: Into<&'b BStr>>( + pub fn get_raw_value<'b>( &self, - section_name: S, - subsection_name: Option, - key: S, - ) -> Result<&Cow<'a, BStr>, GitConfigError<'b>> { - let key = key.into(); + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result<&Cow<'a, [u8]>, GitConfigError<'b>> { + let key = key; // Note: cannot wrap around the raw_multi_value method because we need // to guarantee that the highest section id is used (so that we follow // the "last one wins" resolution strategy by `git-config`). - let section_id = self.get_section_id_by_name_and_subname( - section_name.into(), - subsection_name.map(Into::into), - )?; + let section_id = self.get_section_id_by_name_and_subname(section_name, subsection_name)?; // section_id is guaranteed to exist in self.sections, else we have a // violated invariant. @@ -275,20 +272,17 @@ impl<'a> GitConfig<'a> { /// /// This function will return an error if the key is not in the requested /// section and subsection, or if the section and subsection do not exist. - pub fn get_raw_value_mut<'b, S: Into<&'b BStr>>( + pub fn get_raw_value_mut<'b>( &mut self, - section_name: S, - subsection_name: Option, - key: S, - ) -> Result<&mut Cow<'a, BStr>, GitConfigError<'b>> { - let key = key.into(); + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result<&mut Cow<'a, [u8]>, GitConfigError<'b>> { + let key = key; // Note: cannot wrap around the raw_multi_value method because we need // to guarantee that the highest section id is used (so that we follow // the "last one wins" resolution strategy by `git-config`). - let section_id = self.get_section_id_by_name_and_subname( - section_name.into(), - subsection_name.map(Into::into), - )?; + let section_id = self.get_section_id_by_name_and_subname(section_name, subsection_name)?; // section_id is guaranteed to exist in self.sections, else we have a // violated invariant. @@ -329,7 +323,11 @@ impl<'a> GitConfig<'a> { /// # let git_config = GitConfig::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); /// assert_eq!( /// git_config.get_raw_multi_value("core", None, "a"), - /// Ok(vec![&Cow::Borrowed("b".into()), &Cow::Borrowed("c".into()), &Cow::Borrowed("d".into())]), + /// Ok(vec![ + /// &Cow::<[u8]>::Borrowed(b"b"), + /// &Cow::<[u8]>::Borrowed(b"c"), + /// &Cow::<[u8]>::Borrowed(b"d"), + /// ]), /// ); /// ``` /// @@ -341,18 +339,15 @@ impl<'a> GitConfig<'a> { /// This function will return an error if the key is not in any requested /// section and subsection, or if no instance of the section and subsections /// exist. - pub fn get_raw_multi_value<'b, S: Into<&'b BStr>>( + pub fn get_raw_multi_value<'b>( &'a self, - section_name: S, - subsection_name: Option, - key: S, - ) -> Result>, GitConfigError<'b>> { - let key = key.into(); + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result>, GitConfigError<'b>> { + let key = key; let mut values = vec![]; - for section_id in self.get_section_ids_by_name_and_subname( - section_name.into(), - subsection_name.map(Into::into), - )? { + for section_id in self.get_section_ids_by_name_and_subname(section_name, subsection_name)? { let mut found_key = false; // section_id is guaranteed to exist in self.sections, else we // have a violated invariant. @@ -391,26 +386,25 @@ impl<'a> GitConfig<'a> { /// ``` /// # use git_config::config::{GitConfig, GitConfigError}; /// # use std::borrow::Cow; - /// # use bstr::BStr; /// # use std::convert::TryFrom; /// # let mut git_config = GitConfig::try_from("[core]a=b\n[core]\na=c\na=d").unwrap(); /// assert_eq!( /// git_config.get_raw_multi_value("core", None, "a")?, /// vec![ - /// &Cow::::Borrowed("b".into()), - /// &Cow::::Borrowed("c".into()), - /// &Cow::::Borrowed("d".into()) + /// &Cow::Borrowed(b"b"), + /// &Cow::Borrowed(b"c"), + /// &Cow::Borrowed(b"d") /// ] /// ); /// for value in git_config.get_raw_multi_value_mut("core", None, "a")? { - /// *value = Cow::Borrowed("g".into()); + /// *value = Cow::Borrowed(b"g"); ///} /// assert_eq!( /// git_config.get_raw_multi_value("core", None, "a")?, /// vec![ - /// &Cow::::Borrowed("g".into()), - /// &Cow::::Borrowed("g".into()), - /// &Cow::::Borrowed("g".into()) + /// &Cow::Borrowed(b"g"), + /// &Cow::Borrowed(b"g"), + /// &Cow::Borrowed(b"g") /// ], /// ); /// # Ok::<(), GitConfigError>(()) @@ -427,21 +421,18 @@ impl<'a> GitConfig<'a> { /// This function will return an error if the key is not in any requested /// section and subsection, or if no instance of the section and subsections /// exist. - pub fn get_raw_multi_value_mut<'b, S: Into<&'b BStr>>( + pub fn get_raw_multi_value_mut<'b>( &mut self, - section_name: S, - subsection_name: Option, - key: S, - ) -> Result>, GitConfigError<'b>> { - let key = key.into(); + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + ) -> Result>, GitConfigError<'b>> { + let key = key; let section_ids = self - .get_section_ids_by_name_and_subname( - section_name.into(), - subsection_name.map(Into::into), - )? + .get_section_ids_by_name_and_subname(section_name, subsection_name)? .to_vec(); let mut found_key = false; - let values: Vec<&mut Cow<'a, BStr>> = self + let values: Vec<&mut Cow<'a, [u8]>> = self .sections .iter_mut() .filter_map(|(k, v)| { @@ -474,8 +465,8 @@ impl<'a> GitConfig<'a> { fn get_section_ids_by_name_and_subname<'b>( &'a self, - section_name: &'b BStr, - subsection_name: Option<&'b BStr>, + section_name: &'b str, + subsection_name: Option<&'b str>, ) -> Result<&[SectionId], GitConfigError<'b>> { let section_ids = self .section_lookup_tree @@ -505,12 +496,12 @@ impl<'a> GitConfig<'a> { .ok_or(GitConfigError::SubSectionDoesNotExist(subsection_name)) } - pub fn set_raw_value<'b, S: Into<&'b BStr>>( + pub fn set_raw_value<'b>( &mut self, - section_name: S, - subsection_name: Option, - key: S, - new_value: impl Into>, + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + new_value: impl Into>, ) -> Result<(), GitConfigError<'b>> { let value = self.get_raw_value_mut(section_name, subsection_name, key)?; *value = new_value.into(); @@ -530,12 +521,12 @@ impl<'a> GitConfig<'a> { /// todo: examples and errors /// /// [`get_raw_multi_value_mut`]: Self::get_raw_multi_value_mut - pub fn set_raw_multi_value<'b, S: Into<&'b BStr>>( + pub fn set_raw_multi_value<'b>( &mut self, - section_name: S, - subsection_name: Option, - key: S, - new_values: Vec>, + section_name: &'b str, + subsection_name: Option<&'b str>, + key: &'b str, + new_values: Vec>, ) -> Result<(), GitConfigError<'b>> { let values = self.get_raw_multi_value_mut(section_name, subsection_name, key)?; for (old, new) in values.into_iter().zip(new_values) { @@ -557,14 +548,14 @@ impl<'a> TryFrom<&'a str> for GitConfig<'a> { } } -impl<'a> TryFrom<&'a BStr> for GitConfig<'a> { +impl<'a> TryFrom<&'a [u8]> for GitConfig<'a> { type Error = ParserError<'a>; /// Convenience constructor. Attempts to parse the provided byte string into //// a [`GitConfig`]. See [`parse_from_bytes`] for more information. /// /// [`parse_from_bytes`]: crate::parser::parse_from_bytes - fn try_from(value: &'a BStr) -> Result { + fn try_from(value: &'a [u8]) -> Result { parse_from_bytes(value).map(Self::from) } } @@ -581,8 +572,8 @@ impl<'a> From> for GitConfig<'a> { }; // Current section that we're building - let mut current_section_name: Option> = None; - let mut current_subsection_name: Option> = None; + let mut current_section_name: Option> = None; + let mut current_subsection_name: Option> = None; let mut maybe_section: Option>> = None; for event in parser.into_iter() { @@ -636,6 +627,9 @@ impl<'a> From> for GitConfig<'a> { } impl Display for GitConfig<'_> { + /// Note that this is a best-effort attempt at printing a `GitConfig`. If + /// there are non UTF-8 values in your config, this will _NOT_ render as + /// read. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for front_matter in &self.front_matter_events { front_matter.fmt(f)?; @@ -682,7 +676,7 @@ mod from_parser { let expected_lookup_tree = { let mut tree = HashMap::new(); tree.insert( - Cow::Borrowed("core".into()), + Cow::Borrowed("core"), vec![LookupTreeNode::Terminal(vec![SectionId(0)])], ); tree @@ -722,9 +716,9 @@ mod from_parser { let expected_lookup_tree = { let mut tree = HashMap::new(); let mut inner_tree = HashMap::new(); - inner_tree.insert(Cow::Borrowed("subsec".into()), vec![SectionId(0)]); + inner_tree.insert(Cow::Borrowed("subsec"), vec![SectionId(0)]); tree.insert( - Cow::Borrowed("core".into()), + Cow::Borrowed("core"), vec![LookupTreeNode::NonTerminal(inner_tree)], ); tree @@ -765,11 +759,11 @@ mod from_parser { let expected_lookup_tree = { let mut tree = HashMap::new(); tree.insert( - Cow::Borrowed("core".into()), + Cow::Borrowed("core"), vec![LookupTreeNode::Terminal(vec![SectionId(0)])], ); tree.insert( - Cow::Borrowed("other".into()), + Cow::Borrowed("other"), vec![LookupTreeNode::Terminal(vec![SectionId(1)])], ); tree @@ -818,7 +812,7 @@ mod from_parser { let expected_lookup_tree = { let mut tree = HashMap::new(); tree.insert( - Cow::Borrowed("core".into()), + Cow::Borrowed("core"), vec![LookupTreeNode::Terminal(vec![SectionId(0), SectionId(1)])], ); tree @@ -863,11 +857,11 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_value("core", None, "a"), - Ok(&Cow::Borrowed("b".into())) + Ok(&Cow::<[u8]>::Borrowed(b"b")) ); assert_eq!( config.get_raw_value("core", None, "c"), - Ok(&Cow::Borrowed("d".into())) + Ok(&Cow::<[u8]>::Borrowed(b"d")) ); } @@ -876,7 +870,7 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\na=d").unwrap(); assert_eq!( config.get_raw_value("core", None, "a"), - Ok(&Cow::Borrowed("d".into())) + Ok(&Cow::<[u8]>::Borrowed(b"d")) ); } @@ -885,7 +879,7 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\n[core]\na=d").unwrap(); assert_eq!( config.get_raw_value("core", None, "a"), - Ok(&Cow::Borrowed("d".into())) + Ok(&Cow::<[u8]>::Borrowed(b"d")) ); } @@ -894,7 +888,7 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_value("foo", None, "a"), - Err(GitConfigError::SectionDoesNotExist("foo".into())) + Err(GitConfigError::SectionDoesNotExist("foo")) ); } @@ -903,7 +897,7 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_value("core", Some("a"), "a"), - Err(GitConfigError::SubSectionDoesNotExist(Some("a".into()))) + Err(GitConfigError::SubSectionDoesNotExist(Some("a"))) ); } @@ -912,7 +906,7 @@ mod get_raw_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_value("core", None, "aaaaaa"), - Err(GitConfigError::KeyDoesNotExist("aaaaaa".into())) + Err(GitConfigError::KeyDoesNotExist("aaaaaa")) ); } @@ -921,11 +915,11 @@ mod get_raw_value { let config = GitConfig::try_from("[core]a=b\n[core.a]a=c").unwrap(); assert_eq!( config.get_raw_value("core", None, "a"), - Ok(&Cow::Borrowed("b".into())) + Ok(&Cow::<[u8]>::Borrowed(b"b")) ); assert_eq!( config.get_raw_value("core", Some("a"), "a"), - Ok(&Cow::Borrowed("c".into())) + Ok(&Cow::<[u8]>::Borrowed(b"c")) ); } } @@ -942,7 +936,7 @@ mod get_value { let first_value: Value = config.get_value("core", None, "a")?; let second_value: Boolean = config.get_value("core", None, "c")?; - assert_eq!(first_value, Value::Other(Cow::Borrowed("b".into()))); + assert_eq!(first_value, Value::Other(Cow::Borrowed(b"b"))); assert_eq!(second_value, Boolean::True(TrueVariant::Implicit)); Ok(()) @@ -967,7 +961,7 @@ mod get_raw_multi_value { let config = GitConfig::try_from("[core]\na=b\na=c").unwrap(); assert_eq!( config.get_raw_multi_value("core", None, "a").unwrap(), - vec![&Cow::Borrowed("b"), &Cow::Borrowed("c")] + vec![&Cow::Borrowed(b"b"), &Cow::Borrowed(b"c")] ); } @@ -977,9 +971,9 @@ mod get_raw_multi_value { assert_eq!( config.get_raw_multi_value("core", None, "a").unwrap(), vec![ - &Cow::Borrowed("b"), - &Cow::Borrowed("c"), - &Cow::Borrowed("d") + &Cow::Borrowed(b"b"), + &Cow::Borrowed(b"c"), + &Cow::Borrowed(b"d") ] ); } @@ -989,7 +983,7 @@ mod get_raw_multi_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_multi_value("foo", None, "a"), - Err(GitConfigError::SectionDoesNotExist("foo".into())) + Err(GitConfigError::SectionDoesNotExist("foo")) ); } @@ -998,7 +992,7 @@ mod get_raw_multi_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_multi_value("core", Some("a"), "a"), - Err(GitConfigError::SubSectionDoesNotExist(Some("a".into()))) + Err(GitConfigError::SubSectionDoesNotExist(Some("a"))) ); } @@ -1007,7 +1001,7 @@ mod get_raw_multi_value { let config = GitConfig::try_from("[core]\na=b\nc=d").unwrap(); assert_eq!( config.get_raw_multi_value("core", None, "aaaaaa"), - Err(GitConfigError::KeyDoesNotExist("aaaaaa".into())) + Err(GitConfigError::KeyDoesNotExist("aaaaaa")) ); } @@ -1016,11 +1010,11 @@ mod get_raw_multi_value { let config = GitConfig::try_from("[core]a=b\n[core.a]a=c").unwrap(); assert_eq!( config.get_raw_multi_value("core", None, "a").unwrap(), - vec![&Cow::Borrowed("b")] + vec![&Cow::Borrowed(b"b")] ); assert_eq!( config.get_raw_multi_value("core", Some("a"), "a").unwrap(), - vec![&Cow::Borrowed("c")] + vec![&Cow::Borrowed(b"c")] ); } @@ -1030,9 +1024,9 @@ mod get_raw_multi_value { assert_eq!( config.get_raw_multi_value("core", None, "a").unwrap(), vec![ - &Cow::Borrowed("b"), - &Cow::Borrowed("c"), - &Cow::Borrowed("d") + &Cow::Borrowed(b"b"), + &Cow::Borrowed(b"c"), + &Cow::Borrowed(b"d") ] ); } diff --git a/src/lib.rs b/src/lib.rs index 20c41bc..d70c174 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,13 +18,13 @@ extern crate serde_crate as serde; // mod de; // mod ser; +// mod error; pub mod config; -mod error; pub mod parser; pub mod values; // pub use de::{from_str, Deserializer}; -pub use error::{Error, Result}; +// pub use error::{Error, Result}; // pub use ser::{to_string, Serializer}; #[cfg(test)] diff --git a/src/parser.rs b/src/parser.rs index 4a9ac7b..8777227 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,5 @@ //! This module handles parsing a `git-config` file. Generally speaking, you -//! want to use a higher abstraction such as [`GitConfig`] unless you have some +//! want to use a higher a[u8]action such as [`GitConfig`] unless you have some //! explicit reason to work with events instead. //! //! The general workflow for interacting with this is to use one of the @@ -9,7 +9,6 @@ //! //! [`GitConfig`]: crate::config::GitConfig -use bstr::{BStr, ByteSlice}; use nom::branch::alt; use nom::bytes::complete::{escaped, tag, take_till, take_while}; use nom::character::complete::{char, none_of, one_of}; @@ -43,26 +42,26 @@ pub enum Event<'a> { /// exists. SectionHeader(ParsedSectionHeader<'a>), /// A name to a value in a section. - Key(Cow<'a, BStr>), + Key(Cow<'a, str>), /// A completed value. This may be any string, including the empty string, /// if an implicit boolean value is used. Note that these values may contain /// spaces and any special character. This value is also unprocessed, so it /// it may contain double quotes that should be replaced. - Value(Cow<'a, BStr>), + Value(Cow<'a, [u8]>), /// Represents any token used to signify a new line character. On Unix /// platforms, this is typically just `\n`, but can be any valid newline /// sequence. Multiple newlines (such as `\n\n`) will be merged as a single /// newline event. - Newline(Cow<'a, BStr>), + Newline(Cow<'a, str>), /// Any value that isn't completed. This occurs when the value is continued /// onto the next line. A Newline event is guaranteed after, followed by /// either a ValueDone, a Whitespace, or another ValueNotDone. - ValueNotDone(Cow<'a, BStr>), + ValueNotDone(Cow<'a, [u8]>), /// The last line of a value which was continued onto another line. - ValueDone(Cow<'a, BStr>), + ValueDone(Cow<'a, [u8]>), /// A continuous section of insignificant whitespace. Values with internal /// spaces will not be separated by this event. - Whitespace(Cow<'a, BStr>), + Whitespace(Cow<'a, str>), /// This event is emitted when the parser counters a valid `=` character /// separating the key and value. This event is necessary as it eliminates /// the ambiguity for whitespace events between a key and value event. @@ -70,15 +69,27 @@ pub enum Event<'a> { } impl Display for Event<'_> { + /// Note that this is a best-effort attempt at printing an `Event`. If + /// there are non UTF-8 values in your config, this will _NOT_ render + /// as read. Consider [`Event::as_bytes`] for one-to-one reading. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Comment(e) => e.fmt(f), Self::SectionHeader(e) => e.fmt(f), Self::Key(e) => e.fmt(f), - Self::Value(e) => e.fmt(f), + Self::Value(e) => match std::str::from_utf8(e) { + Ok(e) => e.fmt(f), + Err(_) => write!(f, "{:02x?}", e), + }, Self::Newline(e) => e.fmt(f), - Self::ValueNotDone(e) => e.fmt(f), - Self::ValueDone(e) => e.fmt(f), + Self::ValueNotDone(e) => match std::str::from_utf8(e) { + Ok(e) => e.fmt(f), + Err(_) => write!(f, "{:02x?}", e), + }, + Self::ValueDone(e) => match std::str::from_utf8(e) { + Ok(e) => e.fmt(f), + Err(_) => write!(f, "{:02x?}", e), + }, Self::Whitespace(e) => e.fmt(f), Self::KeyValueSeparator => write!(f, "="), } @@ -111,18 +122,22 @@ impl<'a> Into> for ParsedSectionHeader<'a> { } /// A parsed section header, containing a name and optionally a subsection name. +/// +/// Note that section headers must be parsed as valid ASCII, and thus all valid +/// instances must also necessarily be valid UTF-8, which is why we use a +/// [`str`] instead of [`[u8]`]. #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] pub struct ParsedSectionHeader<'a> { /// The name of the header. - pub name: Cow<'a, BStr>, + pub name: Cow<'a, str>, /// The separator used to determine if the section contains a subsection. /// This is either a period `.` or a string of whitespace. Note that /// reconstruction of subsection format is dependent on this value. If this /// is all whitespace, then the subsection name needs to be surrounded by /// quotes to have perfect reconstruction. - pub separator: Option>, + pub separator: Option>, /// The subsection name without quotes if any exist. - pub subsection_name: Option>, + pub subsection_name: Option>, } impl Display for ParsedSectionHeader<'_> { @@ -130,9 +145,10 @@ impl Display for ParsedSectionHeader<'_> { write!(f, "[{}", self.name)?; if let Some(v) = &self.separator { + // Separator must be utf-8 v.fmt(f)?; let subsection_name = self.subsection_name.as_ref().unwrap(); - if *v == b".".as_bstr() { + if v == "." { subsection_name.fmt(f)?; } else { write!(f, "\"{}\"", subsection_name)?; @@ -149,13 +165,20 @@ pub struct ParsedComment<'a> { /// The comment marker used. This is either a semicolon or octothorpe. pub comment_tag: char, /// The parsed comment. - pub comment: Cow<'a, BStr>, + pub comment: Cow<'a, [u8]>, } impl Display for ParsedComment<'_> { + /// Note that this is a best-effort attempt at printing an comment. If + /// there are non UTF-8 values in your config, this will _NOT_ render + /// as read. Consider [`Event::as_bytes`] for one-to-one reading. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.comment_tag.fmt(f)?; - self.comment.fmt(f) + if let Ok(s) = std::str::from_utf8(&self.comment) { + s.fmt(f) + } else { + write!(f, "{:02x?}", self.comment) + } } } @@ -235,7 +258,7 @@ impl Display for ParserNode { /// /// This is parser exposes low-level syntactic events from a `git-config` file. /// Generally speaking, you'll want to use [`GitConfig`] as it wraps -/// around the parser to provide a higher-level abstraction to a `git-config` +/// around the parser to provide a higher-level a[u8]action to a `git-config` /// file, including querying, modifying, and updating values. /// /// This parser guarantees that the events emitted are sufficient to @@ -308,20 +331,20 @@ impl Display for ParserNode { /// # use git_config::parser::{Event, ParsedSectionHeader, parse_from_str}; /// # use std::borrow::Cow; /// # let section_header = ParsedSectionHeader { -/// # name: Cow::Borrowed("core".into()), +/// # name: Cow::Borrowed("core"), /// # separator: None, /// # subsection_name: None, /// # }; /// # let section_data = "[core]\n autocrlf = input"; /// # assert_eq!(parse_from_str(section_data).unwrap().into_vec(), vec![ /// Event::SectionHeader(section_header), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::Whitespace(Cow::Borrowed(" ".into())), -/// Event::Key(Cow::Borrowed("autocrlf".into())), -/// Event::Whitespace(Cow::Borrowed(" ".into())), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::Whitespace(Cow::Borrowed(" ")), +/// Event::Key(Cow::Borrowed("autocrlf")), +/// Event::Whitespace(Cow::Borrowed(" ")), /// Event::KeyValueSeparator, -/// Event::Whitespace(Cow::Borrowed(" ".into())), -/// Event::Value(Cow::Borrowed("input".into())), +/// Event::Whitespace(Cow::Borrowed(" ")), +/// Event::Value(Cow::Borrowed(b"input")), /// # ]); /// ``` /// @@ -346,19 +369,18 @@ impl Display for ParserNode { /// ``` /// # use git_config::parser::{Event, ParsedSectionHeader, parse_from_str}; /// # use std::borrow::Cow; -/// # use bstr::BStr; /// # let section_header = ParsedSectionHeader { -/// # name: Cow::Borrowed("core".into()), +/// # name: Cow::Borrowed("core"), /// # separator: None, /// # subsection_name: None, /// # }; /// # let section_data = "[core]\n autocrlf"; /// # assert_eq!(parse_from_str(section_data).unwrap().into_vec(), vec![ /// Event::SectionHeader(section_header), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::Whitespace(Cow::Borrowed(" ".into())), -/// Event::Key(Cow::Borrowed("autocrlf".into())), -/// Event::Value(Cow::Borrowed("".into())), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::Whitespace(Cow::Borrowed(" ")), +/// Event::Key(Cow::Borrowed("autocrlf")), +/// Event::Value(Cow::Borrowed(b"")), /// # ]); /// ``` /// @@ -382,21 +404,21 @@ impl Display for ParserNode { /// # use git_config::parser::{Event, ParsedSectionHeader, parse_from_str}; /// # use std::borrow::Cow; /// # let section_header = ParsedSectionHeader { -/// # name: Cow::Borrowed("core".into()), +/// # name: Cow::Borrowed("core"), /// # separator: None, /// # subsection_name: None, /// # }; /// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\""; /// # assert_eq!(parse_from_str(section_data).unwrap().into_vec(), vec![ /// Event::SectionHeader(section_header), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::Key(Cow::Borrowed("autocrlf".into())), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::Key(Cow::Borrowed("autocrlf")), /// Event::KeyValueSeparator, -/// Event::Value(Cow::Borrowed(r#"true"""#.into())), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::Key(Cow::Borrowed("filemode".into())), +/// Event::Value(Cow::Borrowed(br#"true"""#)), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::Key(Cow::Borrowed("filemode")), /// Event::KeyValueSeparator, -/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())), +/// Event::Value(Cow::Borrowed(br#"fa"lse""#)), /// # ]); /// ``` /// @@ -419,19 +441,19 @@ impl Display for ParserNode { /// # use git_config::parser::{Event, ParsedSectionHeader, parse_from_str}; /// # use std::borrow::Cow; /// # let section_header = ParsedSectionHeader { -/// # name: Cow::Borrowed("some-section".into()), +/// # name: Cow::Borrowed("some-section"), /// # separator: None, /// # subsection_name: None, /// # }; /// # let section_data = "[some-section]\nfile=a\\\n c"; /// # assert_eq!(parse_from_str(section_data).unwrap().into_vec(), vec![ /// Event::SectionHeader(section_header), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::Key(Cow::Borrowed("file".into())), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::Key(Cow::Borrowed("file")), /// Event::KeyValueSeparator, -/// Event::ValueNotDone(Cow::Borrowed("a".into())), -/// Event::Newline(Cow::Borrowed("\n".into())), -/// Event::ValueDone(Cow::Borrowed(" c".into())), +/// Event::ValueNotDone(Cow::Borrowed(b"a")), +/// Event::Newline(Cow::Borrowed("\n")), +/// Event::ValueDone(Cow::Borrowed(b" c")), /// # ]); /// ``` /// @@ -487,6 +509,7 @@ impl<'a> Parser<'a> { } /// Consumes the parser to produce an iterator of Events. + #[must_use = "iterators are lazy and do nothing unless consumed"] pub fn into_iter(self) -> impl Iterator> + FusedIterator { // Can't impl IntoIter without allocating.and using a generic associated type // TODO: try harder? @@ -548,13 +571,13 @@ pub fn parse_from_bytes(input: &[u8]) -> Result, ParserError> { let (i, frontmatter) = many0(alt(( map(comment, Event::Comment), map(take_spaces, |whitespace| { - Event::Whitespace(Cow::Borrowed(whitespace.into())) + Event::Whitespace(Cow::Borrowed(whitespace)) }), map(take_newline, |(newline, counter)| { newlines += counter; - Event::Newline(Cow::Borrowed(newline.into())) + Event::Newline(Cow::Borrowed(newline)) }), - )))(input.as_bytes()) + )))(input) // I don't think this can panic. many0 errors if the child parser returns // a success where the input was not consumed, but alt will only return Ok // if one of its children succeed. However, all of it's children are @@ -609,7 +632,7 @@ fn comment(i: &[u8]) -> IResult<&[u8], ParsedComment> { i, ParsedComment { comment_tag, - comment: Cow::Borrowed(comment.into()), + comment: Cow::Borrowed(comment), }, )) } @@ -631,7 +654,7 @@ fn section<'a, 'b>( if let Ok((new_i, v)) = take_spaces(i) { if old_i != new_i { i = new_i; - items.push(Event::Whitespace(Cow::Borrowed(v.into()))); + items.push(Event::Whitespace(Cow::Borrowed(v))); } } @@ -639,7 +662,7 @@ fn section<'a, 'b>( if old_i != new_i { i = new_i; newlines += new_newlines; - items.push(Event::Newline(Cow::Borrowed(v.into()))); + items.push(Event::Newline(Cow::Borrowed(v))); } } @@ -678,21 +701,24 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> { // No spaces must be between section name and section start let (i, name) = take_while(|c: u8| c.is_ascii_alphanumeric() || c == b'-' || c == b'.')(i)?; + let name = std::str::from_utf8(name).map_err(|_| { + nom::Err::Error(NomError::<&[u8]> { + input: i, + code: ErrorKind::AlphaNumeric, + }) + })?; + if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { // Either section does not have a subsection or using deprecated // subsection syntax at this point. - let header = match name.rfind(&[b'.']) { + let header = match find_legacy_subsection_separator(name) { Some(index) => ParsedSectionHeader { - name: Cow::Borrowed(name[..index].into()), - separator: name - .get(index..index + 1) - .map(|slice| Cow::Borrowed(slice.into())), - subsection_name: name - .get(index + 1..) - .map(|slice| Cow::Borrowed(slice.into())), + name: Cow::Borrowed(&name[..index]), + separator: name.get(index..index + 1).map(|slice| Cow::Borrowed(slice)), + subsection_name: name.get(index + 1..).map(|slice| Cow::Borrowed(slice)), }, None => ParsedSectionHeader { - name: Cow::Borrowed(name.into()), + name: Cow::Borrowed(name), separator: None, subsection_name: None, }, @@ -710,20 +736,38 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> { tag("\"]"), )(i)?; + let subsection_name = subsection_name + .map(std::str::from_utf8) + .transpose() + .map_err(|_| { + nom::Err::Error(NomError::<&[u8]> { + input: i, + code: ErrorKind::AlphaNumeric, + }) + })?; + Ok(( i, ParsedSectionHeader { - name: Cow::Borrowed(name.into()), - separator: Some(Cow::Borrowed(whitespace.into())), + name: Cow::Borrowed(name), + separator: Some(Cow::Borrowed(whitespace)), // We know that there's some section name here, so if we get an // empty vec here then we actually parsed an empty section name. - subsection_name: subsection_name - .or(Some(b"")) - .map(|slice| Cow::Borrowed(slice.into())), + subsection_name: subsection_name.or(Some("")).map(Cow::Borrowed), }, )) } +fn find_legacy_subsection_separator(input: &str) -> Option { + let input = input.as_bytes(); + for i in (0..input.len()).into_iter().rev() { + if input[i] == b'.' { + return Some(i); + } + } + None +} + fn section_body<'a, 'b, 'c>( i: &'a [u8], node: &'b mut ParserNode, @@ -733,12 +777,12 @@ fn section_body<'a, 'b, 'c>( *node = ParserNode::ConfigName; let (i, name) = config_name(i)?; - items.push(Event::Key(Cow::Borrowed(name.into()))); + items.push(Event::Key(Cow::Borrowed(name))); let (i, whitespace) = opt(take_spaces)(i)?; if let Some(whitespace) = whitespace { - items.push(Event::Whitespace(Cow::Borrowed(whitespace.into()))); + items.push(Event::Whitespace(Cow::Borrowed(whitespace))); } let (i, _) = config_value(i, items)?; @@ -747,7 +791,7 @@ fn section_body<'a, 'b, 'c>( /// Parses the config name of a config pair. Assumes the input has already been /// trimmed of any leading whitespace. -fn config_name(i: &[u8]) -> IResult<&[u8], &[u8]> { +fn config_name(i: &[u8]) -> IResult<&[u8], &str> { if i.is_empty() { return Err(nom::Err::Error(NomError { input: i, @@ -761,7 +805,16 @@ fn config_name(i: &[u8]) -> IResult<&[u8], &[u8]> { code: ErrorKind::Alpha, })); } - take_while(|c: u8| (c as char).is_alphanumeric() || c == b'-')(i) + + let (i, v) = take_while(|c: u8| (c as char).is_alphanumeric() || c == b'-')(i)?; + let v = std::str::from_utf8(v).map_err(|_| { + nom::Err::Error(NomError::<&[u8]> { + input: i, + code: ErrorKind::AlphaNumeric, + }) + })?; + + Ok((i, v)) } fn config_value<'a, 'b>(i: &'a [u8], events: &'b mut Vec>) -> IResult<&'a [u8], ()> { @@ -769,12 +822,12 @@ fn config_value<'a, 'b>(i: &'a [u8], events: &'b mut Vec>) -> IResult< events.push(Event::KeyValueSeparator); let (i, whitespace) = opt(take_spaces)(i)?; if let Some(whitespace) = whitespace { - events.push(Event::Whitespace(Cow::Borrowed(whitespace.into()))); + events.push(Event::Whitespace(Cow::Borrowed(whitespace))); } let (i, _) = value_impl(i, events)?; Ok((i, ())) } else { - events.push(Event::Value(Cow::Borrowed("".into()))); + events.push(Event::Value(Cow::Borrowed(b""))); Ok((i, ())) } } @@ -804,10 +857,10 @@ fn value_impl<'a, 'b>(i: &'a [u8], events: &'b mut Vec>) -> IResult<&' // continuation. b'\n' => { partial_value_found = true; - events.push(Event::ValueNotDone(Cow::Borrowed( - i[offset..index - 1].into(), + events.push(Event::ValueNotDone(Cow::Borrowed(&i[offset..index - 1]))); + events.push(Event::Newline(Cow::Borrowed( + std::str::from_utf8(&i[index..index + 1]).unwrap(), ))); - events.push(Event::Newline(Cow::Borrowed(i[index..index + 1].into()))); offset = index + 1; parsed_index = 0; } @@ -868,15 +921,15 @@ fn value_impl<'a, 'b>(i: &'a [u8], events: &'b mut Vec>) -> IResult<&' }; if partial_value_found { - events.push(Event::ValueDone(Cow::Borrowed(remainder_value.into()))); + events.push(Event::ValueDone(Cow::Borrowed(remainder_value))); } else { - events.push(Event::Value(Cow::Borrowed(remainder_value.into()))); + events.push(Event::Value(Cow::Borrowed(remainder_value))); } Ok((i, ())) } -fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> { +fn take_spaces(i: &[u8]) -> IResult<&[u8], &str> { let (i, v) = take_while(|c| (c as char).is_ascii() && is_space(c))(i)?; if v.is_empty() { Err(nom::Err::Error(NomError { @@ -884,11 +937,12 @@ fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> { code: ErrorKind::Eof, })) } else { - Ok((i, v)) + // v is guaranteed to be utf-8 + Ok((i, std::str::from_utf8(v).unwrap())) } } -fn take_newline(i: &[u8]) -> IResult<&[u8], (&[u8], usize)> { +fn take_newline(i: &[u8]) -> IResult<&[u8], (&str, usize)> { let mut counter = 0; let (i, v) = take_while(|c| (c as char).is_ascii() && is_newline(c))(i)?; counter += v.len(); @@ -898,7 +952,8 @@ fn take_newline(i: &[u8]) -> IResult<&[u8], (&[u8], usize)> { code: ErrorKind::Eof, })) } else { - Ok((i, (v, counter))) + // v is guaranteed to be utf-8 + Ok((i, (std::str::from_utf8(v).unwrap(), counter))) } } @@ -1011,10 +1066,7 @@ mod config_name { #[test] fn just_name() { - assert_eq!( - config_name(b"name").unwrap(), - fully_consumed("name".as_bytes()) - ); + assert_eq!(config_name(b"name").unwrap(), fully_consumed("name")); } #[test] diff --git a/src/test_util.rs b/src/test_util.rs index 81f862b..0d8a263 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -13,12 +13,12 @@ pub fn section_header( name: &str, subsection: impl Into>, ) -> ParsedSectionHeader<'_> { - let name = Cow::Borrowed(name.into()); + let name = name.into(); if let Some((separator, subsection_name)) = subsection.into() { ParsedSectionHeader { name, - separator: Some(Cow::Borrowed(separator.into())), - subsection_name: Some(Cow::Borrowed(subsection_name.into())), + separator: Some(Cow::Borrowed(separator)), + subsection_name: Some(Cow::Borrowed(subsection_name)), } } else { ParsedSectionHeader { @@ -30,19 +30,19 @@ pub fn section_header( } pub(crate) fn name_event(name: &'static str) -> Event<'static> { - Event::Key(Cow::Borrowed(name.into())) + Event::Key(Cow::Borrowed(name)) } pub(crate) fn value_event(value: &'static str) -> Event<'static> { - Event::Value(Cow::Borrowed(value.into())) + Event::Value(Cow::Borrowed(value.as_bytes())) } pub(crate) fn value_not_done_event(value: &'static str) -> Event<'static> { - Event::ValueNotDone(Cow::Borrowed(value.into())) + Event::ValueNotDone(Cow::Borrowed(value.as_bytes())) } pub(crate) fn value_done_event(value: &'static str) -> Event<'static> { - Event::ValueDone(Cow::Borrowed(value.into())) + Event::ValueDone(Cow::Borrowed(value.as_bytes())) } pub(crate) fn newline_event() -> Event<'static> { @@ -50,11 +50,11 @@ pub(crate) fn newline_event() -> Event<'static> { } pub(crate) fn newline_custom_event(value: &'static str) -> Event<'static> { - Event::Newline(Cow::Borrowed(value.into())) + Event::Newline(Cow::Borrowed(value)) } pub(crate) fn whitespace_event(value: &'static str) -> Event<'static> { - Event::Whitespace(Cow::Borrowed(value.into())) + Event::Whitespace(Cow::Borrowed(value)) } pub(crate) fn comment_event(tag: char, msg: &'static str) -> Event<'static> { @@ -64,7 +64,7 @@ pub(crate) fn comment_event(tag: char, msg: &'static str) -> Event<'static> { pub(crate) fn comment(comment_tag: char, comment: &'static str) -> ParsedComment<'static> { ParsedComment { comment_tag, - comment: Cow::Borrowed(comment.into()), + comment: Cow::Borrowed(comment.as_bytes()), } } diff --git a/src/values.rs b/src/values.rs index 376c1d9..c86d3dd 100644 --- a/src/values.rs +++ b/src/values.rs @@ -1,6 +1,5 @@ //! Rust containers for valid `git-config` types. -use bstr::{BStr, ByteSlice}; #[cfg(feature = "serde")] use serde::{Serialize, Serializer}; use std::borrow::Cow; @@ -121,7 +120,7 @@ pub enum Value<'a> { /// If a value does not match from any of the other variants, then this /// variant will be matched. As a result, conversion from a `str`-like item /// will never fail. - Other(Cow<'a, BStr>), + Other(Cow<'a, [u8]>), } impl<'a> From<&'a str> for Value<'a> { @@ -138,7 +137,7 @@ impl<'a> From<&'a str> for Value<'a> { return Self::Color(color); } - Self::Other(Cow::Borrowed(s.into())) + Self::Other(Cow::Borrowed(s.as_bytes())) } } @@ -148,7 +147,7 @@ impl<'a> From<&'a [u8]> for Value<'a> { if let Ok(s) = std::str::from_utf8(s) { Self::from(s) } else { - Self::Other(Cow::Borrowed(s.as_bstr())) + Self::Other(Cow::Borrowed(s)) } } } @@ -175,9 +174,7 @@ impl Serialize for Value<'_> { /// Note that while values can effectively be any byte string, the `git-config` /// documentation has a strict subset of values that may be interpreted as a /// boolean value, all of which are ASCII and thus UTF-8 representable. -/// Consequently, variants hold [`str`]s rather than [`BStr`]s. -/// -/// [`BStr`]: bstr::BStr +/// Consequently, variants hold [`str`]s rather than [`[u8]`]s. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[allow(missing_docs)] pub enum Boolean<'a> { diff --git a/tests/parser_integration_tests.rs b/tests/parser_integration_tests.rs index 7ff53a5..8be91fd 100644 --- a/tests/parser_integration_tests.rs +++ b/tests/parser_integration_tests.rs @@ -34,7 +34,7 @@ fn name(name: &'static str) -> Event<'static> { } fn value(value: &'static str) -> Event<'static> { - Event::Value(Cow::Borrowed(value.into())) + Event::Value(Cow::Borrowed(value.as_bytes())) } fn newline() -> Event<'static> {