Compare commits

...

5 commits

4 changed files with 788 additions and 519 deletions

View file

@ -16,6 +16,7 @@ exclude = ["fuzz/**/*", ".vscode/**/*"]
serde = ["serde_crate"] serde = ["serde_crate"]
[dependencies] [dependencies]
memchr = "2"
nom = { version = "6", default_features = false, features = ["std"] } nom = { version = "6", default_features = false, features = ["std"] }
serde_crate = { version = "1", package = "serde", optional = true } serde_crate = { version = "1", package = "serde", optional = true }

File diff suppressed because it is too large Load diff

View file

@ -700,7 +700,7 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) { if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) {
// Either section does not have a subsection or using deprecated // Either section does not have a subsection or using deprecated
// subsection syntax at this point. // subsection syntax at this point.
let header = match find_legacy_subsection_separator(name) { let header = match memchr::memrchr(b'.', name.as_bytes()) {
Some(index) => ParsedSectionHeader { Some(index) => ParsedSectionHeader {
name: Cow::Borrowed(&name[..index]), name: Cow::Borrowed(&name[..index]),
separator: name.get(index..=index).map(|slice| Cow::Borrowed(slice)), separator: name.get(index..=index).map(|slice| Cow::Borrowed(slice)),
@ -747,16 +747,6 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
)) ))
} }
fn find_legacy_subsection_separator(input: &str) -> Option<usize> {
let input = input.as_bytes();
for i in (0..input.len()).into_iter().rev() {
if input[i] == b'.' {
return Some(i);
}
}
None
}
fn section_body<'a, 'b, 'c>( fn section_body<'a, 'b, 'c>(
i: &'a [u8], i: &'a [u8],
node: &'b mut ParserNode, node: &'b mut ParserNode,

View file

@ -21,53 +21,62 @@ use std::str::FromStr;
/// need to call this yourself. However, if you're directly handling events /// need to call this yourself. However, if you're directly handling events
/// from the parser, you may want to use this to help with value interpretation. /// from the parser, you may want to use this to help with value interpretation.
/// ///
/// Generally speaking, you'll want to use one of the variants of this function,
/// such as [`normalize_str`] or [`normalize_vec`].
///
/// # Examples /// # Examples
/// ///
/// Values don't need modification are returned borrowed, without allocation. /// Values don't need modification are returned borrowed, without allocation.
/// ///
/// ``` /// ```
/// # use std::borrow::Cow; /// # use std::borrow::Cow;
/// # use git_config::values::normalize; /// # use git_config::values::normalize_str;
/// assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world".into())); /// assert_eq!(normalize_str("hello world"), Cow::Borrowed(b"hello world".into()));
/// ``` /// ```
/// ///
/// Fully quoted values are optimized to not need allocations. /// Fully quoted values are optimized to not need allocations.
/// ///
/// ``` /// ```
/// # use std::borrow::Cow; /// # use std::borrow::Cow;
/// # use git_config::values::normalize; /// # use git_config::values::normalize_str;
/// assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world".into())); /// assert_eq!(normalize_str("\"hello world\""), Cow::Borrowed(b"hello world".into()));
/// ``` /// ```
/// ///
/// Quoted values are unwrapped as an owned variant. /// Quoted values are unwrapped as an owned variant.
/// ///
/// ``` /// ```
/// # use std::borrow::Cow; /// # use std::borrow::Cow;
/// # use git_config::values::normalize; /// # use git_config::values::normalize_str;
/// assert_eq!(normalize(b"hello \"world\""), Cow::<[u8]>::Owned(b"hello world".to_vec())); /// assert_eq!(normalize_str("hello \"world\""), Cow::<[u8]>::Owned(b"hello world".to_vec()));
/// ``` /// ```
/// ///
/// Escaped quotes are unescaped. /// Escaped quotes are unescaped.
/// ///
/// ``` /// ```
/// # use std::borrow::Cow; /// # use std::borrow::Cow;
/// # use git_config::values::normalize; /// # use git_config::values::normalize_str;
/// assert_eq!(normalize(br#"hello "world\"""#), Cow::<[u8]>::Owned(br#"hello world""#.to_vec())); /// assert_eq!(normalize_str(r#"hello "world\"""#), Cow::<[u8]>::Owned(br#"hello world""#.to_vec()));
/// ``` /// ```
/// ///
/// [`parser`]: crate::parser::Parser /// [`parser`]: crate::parser::Parser
pub fn normalize(input: &[u8]) -> Cow<'_, [u8]> { pub fn normalize_cow(input: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
let mut first_index = 0; let mut first_index = 0;
let mut last_index = 0; let mut last_index = 0;
let size = input.len(); let size = input.len();
if &*input == b"\"\"" {
if input == b"\"\"" {
return Cow::Borrowed(&[]); return Cow::Borrowed(&[]);
} }
if size >= 3 && input[0] == b'=' && input[size - 1] == b'=' && input[size - 2] != b'\\' { if size >= 3 && input[0] == b'=' && input[size - 1] == b'=' && input[size - 2] != b'\\' {
return normalize(&input[1..size]); match input {
Cow::Borrowed(input) => return normalize_bytes(&input[1..size]),
Cow::Owned(mut input) => {
input.pop();
input.remove(0);
return normalize_vec(input);
}
}
} }
let mut owned = vec![]; let mut owned = vec![];
@ -104,12 +113,30 @@ pub fn normalize(input: &[u8]) -> Cow<'_, [u8]> {
owned.extend(dbg!(&input[last_index..])); owned.extend(dbg!(&input[last_index..]));
if owned.is_empty() { if owned.is_empty() {
Cow::Borrowed(input) input
} else { } else {
Cow::Owned(owned) Cow::Owned(owned)
} }
} }
/// `&[u8]` variant of [`normalize_cow`].
#[inline]
pub fn normalize_bytes(input: &[u8]) -> Cow<'_, [u8]> {
normalize_cow(Cow::Borrowed(input))
}
/// `Vec[u8]` variant of [`normalize_cow`].
#[inline]
pub fn normalize_vec(input: Vec<u8>) -> Cow<'static, [u8]> {
normalize_cow(Cow::Owned(input))
}
/// [`str`] variant of [`normalize_cow`].
#[inline]
pub fn normalize_str(input: &str) -> Cow<'_, [u8]> {
normalize_bytes(input.as_bytes())
}
/// Fully enumerated valid types that a `git-config` value can be. /// Fully enumerated valid types that a `git-config` value can be.
#[allow(missing_docs)] #[allow(missing_docs)]
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
@ -152,6 +179,35 @@ impl<'a> From<&'a [u8]> for Value<'a> {
} }
} }
impl From<String> for Value<'_> {
fn from(s: String) -> Self {
Self::from(s.into_bytes())
}
}
impl From<Vec<u8>> for Value<'_> {
fn from(s: Vec<u8>) -> Self {
if let Ok(int) = Integer::try_from(s.as_ref()) {
return Self::Integer(int);
}
if let Ok(color) = Color::try_from(s.as_ref()) {
return Self::Color(color);
}
Boolean::try_from(s).map_or_else(|v| Self::Other(Cow::Owned(v)), Self::Boolean)
}
}
impl<'a> From<Cow<'a, [u8]>> for Value<'a> {
fn from(c: Cow<'a, [u8]>) -> Self {
match c {
Cow::Borrowed(c) => Self::from(c),
Cow::Owned(c) => Self::from(c),
}
}
}
// todo display for value // todo display for value
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
@ -175,11 +231,11 @@ impl Serialize for Value<'_> {
/// documentation has a strict subset of values that may be interpreted as a /// documentation has a strict subset of values that may be interpreted as a
/// boolean value, all of which are ASCII and thus UTF-8 representable. /// boolean value, all of which are ASCII and thus UTF-8 representable.
/// Consequently, variants hold [`str`]s rather than [`[u8]`]s. /// Consequently, variants hold [`str`]s rather than [`[u8]`]s.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum Boolean<'a> { pub enum Boolean<'a> {
True(TrueVariant<'a>), True(TrueVariant<'a>),
False(&'a str), False(Cow<'a, str>),
} }
impl<'a> TryFrom<&'a str> for Boolean<'a> { impl<'a> TryFrom<&'a str> for Boolean<'a> {
@ -204,13 +260,48 @@ impl<'a> TryFrom<&'a [u8]> for Boolean<'a> {
|| value.eq_ignore_ascii_case(b"zero") || value.eq_ignore_ascii_case(b"zero")
|| value == b"\"\"" || value == b"\"\""
{ {
return Ok(Self::False(std::str::from_utf8(value).unwrap())); return Ok(Self::False(std::str::from_utf8(value).unwrap().into()));
} }
Err(()) Err(())
} }
} }
impl TryFrom<String> for Boolean<'_> {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::try_from(value.into_bytes()).map_err(|v| String::from_utf8(v).unwrap())
}
}
impl TryFrom<Vec<u8>> for Boolean<'_> {
type Error = Vec<u8>;
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
if value.eq_ignore_ascii_case(b"no")
|| value.eq_ignore_ascii_case(b"off")
|| value.eq_ignore_ascii_case(b"false")
|| value.eq_ignore_ascii_case(b"zero")
|| value == b"\"\""
{
return Ok(Self::False(Cow::Owned(String::from_utf8(value).unwrap())));
}
TrueVariant::try_from(value).map(Self::True)
}
}
impl<'a> TryFrom<Cow<'a, [u8]>> for Boolean<'a> {
type Error = ();
fn try_from(c: Cow<'a, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
impl Display for Boolean<'_> { impl Display for Boolean<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
@ -245,10 +336,10 @@ impl Serialize for Boolean<'_> {
/// Discriminating enum between implicit and explicit truthy values. /// Discriminating enum between implicit and explicit truthy values.
/// ///
/// This enum is part of the [`Boolean`] struct. /// This enum is part of the [`Boolean`] struct.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[allow(missing_docs)] #[allow(missing_docs)]
pub enum TrueVariant<'a> { pub enum TrueVariant<'a> {
Explicit(&'a str), Explicit(Cow<'a, str>),
/// For values defined without a `= <value>`. /// For values defined without a `= <value>`.
Implicit, Implicit,
} }
@ -270,7 +361,7 @@ impl<'a> TryFrom<&'a [u8]> for TrueVariant<'a> {
|| value.eq_ignore_ascii_case(b"true") || value.eq_ignore_ascii_case(b"true")
|| value.eq_ignore_ascii_case(b"one") || value.eq_ignore_ascii_case(b"one")
{ {
Ok(Self::Explicit(std::str::from_utf8(value).unwrap())) Ok(Self::Explicit(std::str::from_utf8(value).unwrap().into()))
} else if value.is_empty() { } else if value.is_empty() {
Ok(Self::Implicit) Ok(Self::Implicit)
} else { } else {
@ -279,6 +370,34 @@ impl<'a> TryFrom<&'a [u8]> for TrueVariant<'a> {
} }
} }
impl TryFrom<String> for TrueVariant<'_> {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::try_from(value.into_bytes()).map_err(|v| String::from_utf8(v).unwrap())
}
}
impl TryFrom<Vec<u8>> for TrueVariant<'_> {
type Error = Vec<u8>;
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
if value.eq_ignore_ascii_case(b"yes")
|| value.eq_ignore_ascii_case(b"on")
|| value.eq_ignore_ascii_case(b"true")
|| value.eq_ignore_ascii_case(b"one")
{
Ok(Self::Explicit(Cow::Owned(
String::from_utf8(value).unwrap(),
)))
} else if value.is_empty() {
Ok(Self::Implicit)
} else {
Err(value)
}
}
}
impl Display for TrueVariant<'_> { impl Display for TrueVariant<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Self::Explicit(v) = self { if let Self::Explicit(v) = self {
@ -378,6 +497,24 @@ impl TryFrom<&[u8]> for Integer {
} }
} }
impl TryFrom<Vec<u8>> for Integer {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
impl TryFrom<Cow<'_, [u8]>> for Integer {
type Error = ();
fn try_from(c: Cow<'_, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
/// Integer prefixes that are supported by `git-config`. /// Integer prefixes that are supported by `git-config`.
/// ///
/// These values are base-2 unit of measurements, not the base-10 variants. /// These values are base-2 unit of measurements, not the base-10 variants.
@ -445,6 +582,14 @@ impl TryFrom<&[u8]> for IntegerSuffix {
} }
} }
impl TryFrom<Vec<u8>> for IntegerSuffix {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
/// Any value that may contain a foreground color, background color, a /// Any value that may contain a foreground color, background color, a
/// collection of color (text) modifiers, or a combination of any of the /// collection of color (text) modifiers, or a combination of any of the
/// aforementioned values. /// aforementioned values.
@ -565,6 +710,24 @@ impl TryFrom<&[u8]> for Color {
} }
} }
impl TryFrom<Vec<u8>> for Color {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
impl TryFrom<Cow<'_, [u8]>> for Color {
type Error = ();
fn try_from(c: Cow<'_, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
/// Discriminating enum for [`Color`] values. /// Discriminating enum for [`Color`] values.
/// ///
/// `git-config` supports the eight standard colors, their bright variants, an /// `git-config` supports the eight standard colors, their bright variants, an
@ -808,31 +971,34 @@ impl TryFrom<&[u8]> for ColorAttribute {
#[cfg(test)] #[cfg(test)]
mod normalize { mod normalize {
use super::normalize; use super::normalize_str;
use std::borrow::Cow; use std::borrow::Cow;
#[test] #[test]
fn not_modified_is_borrowed() { fn not_modified_is_borrowed() {
assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world")); assert_eq!(normalize_str("hello world"), Cow::Borrowed(b"hello world"));
} }
#[test] #[test]
fn modified_is_owned() { fn modified_is_owned() {
assert_eq!( assert_eq!(
normalize(b"hello \"world\""), normalize_str("hello \"world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec()) Cow::<[u8]>::Owned(b"hello world".to_vec())
); );
} }
#[test] #[test]
fn all_quoted_is_optimized() { fn all_quoted_is_optimized() {
assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world")); assert_eq!(
normalize_str("\"hello world\""),
Cow::Borrowed(b"hello world")
);
} }
#[test] #[test]
fn all_quote_optimization_is_correct() { fn all_quote_optimization_is_correct() {
assert_eq!( assert_eq!(
normalize(br#""hello" world\""#), normalize_str(r#""hello" world\""#),
Cow::Borrowed(b"hello world\"") Cow::Borrowed(b"hello world\"")
); );
} }
@ -840,7 +1006,7 @@ mod normalize {
#[test] #[test]
fn quotes_right_next_to_each_other() { fn quotes_right_next_to_each_other() {
assert_eq!( assert_eq!(
normalize(b"\"hello\"\" world\""), normalize_str("\"hello\"\" world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec()) Cow::<[u8]>::Owned(b"hello world".to_vec())
); );
} }
@ -848,19 +1014,19 @@ mod normalize {
#[test] #[test]
fn escaped_quotes_are_kept() { fn escaped_quotes_are_kept() {
assert_eq!( assert_eq!(
normalize(br#""hello \"\" world""#), normalize_str(r#""hello \"\" world""#),
Cow::<[u8]>::Owned(b"hello \"\" world".to_vec()) Cow::<[u8]>::Owned(b"hello \"\" world".to_vec())
); );
} }
#[test] #[test]
fn empty_string() { fn empty_string() {
assert_eq!(normalize(b""), Cow::Borrowed(b"")); assert_eq!(normalize_str(""), Cow::Borrowed(b""));
} }
#[test] #[test]
fn empty_normalized_string_is_optimized() { fn empty_normalized_string_is_optimized() {
assert_eq!(normalize(b"\"\""), Cow::Borrowed(b"")); assert_eq!(normalize_str("\"\""), Cow::Borrowed(b""));
} }
} }
@ -870,30 +1036,33 @@ mod boolean {
#[test] #[test]
fn from_str_false() { fn from_str_false() {
assert_eq!(Boolean::try_from("no"), Ok(Boolean::False("no"))); assert_eq!(Boolean::try_from("no"), Ok(Boolean::False("no".into())));
assert_eq!(Boolean::try_from("off"), Ok(Boolean::False("off"))); assert_eq!(Boolean::try_from("off"), Ok(Boolean::False("off".into())));
assert_eq!(Boolean::try_from("false"), Ok(Boolean::False("false"))); assert_eq!(
assert_eq!(Boolean::try_from("zero"), Ok(Boolean::False("zero"))); Boolean::try_from("false"),
assert_eq!(Boolean::try_from("\"\""), Ok(Boolean::False("\"\""))); Ok(Boolean::False("false".into()))
);
assert_eq!(Boolean::try_from("zero"), Ok(Boolean::False("zero".into())));
assert_eq!(Boolean::try_from("\"\""), Ok(Boolean::False("\"\"".into())));
} }
#[test] #[test]
fn from_str_true() { fn from_str_true() {
assert_eq!( assert_eq!(
Boolean::try_from("yes"), Boolean::try_from("yes"),
Ok(Boolean::True(TrueVariant::Explicit("yes"))) Ok(Boolean::True(TrueVariant::Explicit("yes".into())))
); );
assert_eq!( assert_eq!(
Boolean::try_from("on"), Boolean::try_from("on"),
Ok(Boolean::True(TrueVariant::Explicit("on"))) Ok(Boolean::True(TrueVariant::Explicit("on".into())))
); );
assert_eq!( assert_eq!(
Boolean::try_from("true"), Boolean::try_from("true"),
Ok(Boolean::True(TrueVariant::Explicit("true"))) Ok(Boolean::True(TrueVariant::Explicit("true".into())))
); );
assert_eq!( assert_eq!(
Boolean::try_from("one"), Boolean::try_from("one"),
Ok(Boolean::True(TrueVariant::Explicit("one"))) Ok(Boolean::True(TrueVariant::Explicit("one".into())))
); );
} }