Compare commits

...

5 Commits

Author SHA1 Message Date
Edward Shen cd2f58c920
use memrchr 2021-03-01 16:01:47 -05:00
Edward Shen c975a2ec14
use drain instead 2021-03-01 15:39:18 -05:00
Edward Shen c9c8e70afb
use mutablevalue for mut entries 2021-02-28 22:47:40 -05:00
Edward Shen 493729cc3c
normalize get_raw_value 2021-02-28 21:07:02 -05:00
Edward Shen c01b61cb11
fix get_raw_value, fix returning refs to cows 2021-02-28 20:42:54 -05:00
4 changed files with 788 additions and 519 deletions

View File

@ -16,6 +16,7 @@ exclude = ["fuzz/**/*", ".vscode/**/*"]
serde = ["serde_crate"]
[dependencies]
memchr = "2"
nom = { version = "6", default_features = false, features = ["std"] }
serde_crate = { version = "1", package = "serde", optional = true }

File diff suppressed because it is too large Load Diff

View File

@ -700,7 +700,7 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
if let Ok((i, _)) = char::<_, NomError<&[u8]>>(']')(i) {
// Either section does not have a subsection or using deprecated
// subsection syntax at this point.
let header = match find_legacy_subsection_separator(name) {
let header = match memchr::memrchr(b'.', name.as_bytes()) {
Some(index) => ParsedSectionHeader {
name: Cow::Borrowed(&name[..index]),
separator: name.get(index..=index).map(|slice| Cow::Borrowed(slice)),
@ -747,16 +747,6 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
))
}
fn find_legacy_subsection_separator(input: &str) -> Option<usize> {
let input = input.as_bytes();
for i in (0..input.len()).into_iter().rev() {
if input[i] == b'.' {
return Some(i);
}
}
None
}
fn section_body<'a, 'b, 'c>(
i: &'a [u8],
node: &'b mut ParserNode,

View File

@ -21,53 +21,62 @@ use std::str::FromStr;
/// need to call this yourself. However, if you're directly handling events
/// from the parser, you may want to use this to help with value interpretation.
///
/// Generally speaking, you'll want to use one of the variants of this function,
/// such as [`normalize_str`] or [`normalize_vec`].
///
/// # Examples
///
/// Values don't need modification are returned borrowed, without allocation.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world".into()));
/// # use git_config::values::normalize_str;
/// assert_eq!(normalize_str("hello world"), Cow::Borrowed(b"hello world".into()));
/// ```
///
/// Fully quoted values are optimized to not need allocations.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world".into()));
/// # use git_config::values::normalize_str;
/// assert_eq!(normalize_str("\"hello world\""), Cow::Borrowed(b"hello world".into()));
/// ```
///
/// Quoted values are unwrapped as an owned variant.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"hello \"world\""), Cow::<[u8]>::Owned(b"hello world".to_vec()));
/// # use git_config::values::normalize_str;
/// assert_eq!(normalize_str("hello \"world\""), Cow::<[u8]>::Owned(b"hello world".to_vec()));
/// ```
///
/// Escaped quotes are unescaped.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(br#"hello "world\"""#), Cow::<[u8]>::Owned(br#"hello world""#.to_vec()));
/// # use git_config::values::normalize_str;
/// assert_eq!(normalize_str(r#"hello "world\"""#), Cow::<[u8]>::Owned(br#"hello world""#.to_vec()));
/// ```
///
/// [`parser`]: crate::parser::Parser
pub fn normalize(input: &[u8]) -> Cow<'_, [u8]> {
pub fn normalize_cow(input: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
let mut first_index = 0;
let mut last_index = 0;
let size = input.len();
if input == b"\"\"" {
if &*input == b"\"\"" {
return Cow::Borrowed(&[]);
}
if size >= 3 && input[0] == b'=' && input[size - 1] == b'=' && input[size - 2] != b'\\' {
return normalize(&input[1..size]);
match input {
Cow::Borrowed(input) => return normalize_bytes(&input[1..size]),
Cow::Owned(mut input) => {
input.pop();
input.remove(0);
return normalize_vec(input);
}
}
}
let mut owned = vec![];
@ -104,12 +113,30 @@ pub fn normalize(input: &[u8]) -> Cow<'_, [u8]> {
owned.extend(dbg!(&input[last_index..]));
if owned.is_empty() {
Cow::Borrowed(input)
input
} else {
Cow::Owned(owned)
}
}
/// `&[u8]` variant of [`normalize_cow`].
#[inline]
pub fn normalize_bytes(input: &[u8]) -> Cow<'_, [u8]> {
normalize_cow(Cow::Borrowed(input))
}
/// `Vec[u8]` variant of [`normalize_cow`].
#[inline]
pub fn normalize_vec(input: Vec<u8>) -> Cow<'static, [u8]> {
normalize_cow(Cow::Owned(input))
}
/// [`str`] variant of [`normalize_cow`].
#[inline]
pub fn normalize_str(input: &str) -> Cow<'_, [u8]> {
normalize_bytes(input.as_bytes())
}
/// Fully enumerated valid types that a `git-config` value can be.
#[allow(missing_docs)]
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
@ -152,6 +179,35 @@ impl<'a> From<&'a [u8]> for Value<'a> {
}
}
impl From<String> for Value<'_> {
fn from(s: String) -> Self {
Self::from(s.into_bytes())
}
}
impl From<Vec<u8>> for Value<'_> {
fn from(s: Vec<u8>) -> Self {
if let Ok(int) = Integer::try_from(s.as_ref()) {
return Self::Integer(int);
}
if let Ok(color) = Color::try_from(s.as_ref()) {
return Self::Color(color);
}
Boolean::try_from(s).map_or_else(|v| Self::Other(Cow::Owned(v)), Self::Boolean)
}
}
impl<'a> From<Cow<'a, [u8]>> for Value<'a> {
fn from(c: Cow<'a, [u8]>) -> Self {
match c {
Cow::Borrowed(c) => Self::from(c),
Cow::Owned(c) => Self::from(c),
}
}
}
// todo display for value
#[cfg(feature = "serde")]
@ -175,11 +231,11 @@ impl Serialize for Value<'_> {
/// documentation has a strict subset of values that may be interpreted as a
/// boolean value, all of which are ASCII and thus UTF-8 representable.
/// Consequently, variants hold [`str`]s rather than [`[u8]`]s.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[allow(missing_docs)]
pub enum Boolean<'a> {
True(TrueVariant<'a>),
False(&'a str),
False(Cow<'a, str>),
}
impl<'a> TryFrom<&'a str> for Boolean<'a> {
@ -204,13 +260,48 @@ impl<'a> TryFrom<&'a [u8]> for Boolean<'a> {
|| value.eq_ignore_ascii_case(b"zero")
|| value == b"\"\""
{
return Ok(Self::False(std::str::from_utf8(value).unwrap()));
return Ok(Self::False(std::str::from_utf8(value).unwrap().into()));
}
Err(())
}
}
impl TryFrom<String> for Boolean<'_> {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::try_from(value.into_bytes()).map_err(|v| String::from_utf8(v).unwrap())
}
}
impl TryFrom<Vec<u8>> for Boolean<'_> {
type Error = Vec<u8>;
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
if value.eq_ignore_ascii_case(b"no")
|| value.eq_ignore_ascii_case(b"off")
|| value.eq_ignore_ascii_case(b"false")
|| value.eq_ignore_ascii_case(b"zero")
|| value == b"\"\""
{
return Ok(Self::False(Cow::Owned(String::from_utf8(value).unwrap())));
}
TrueVariant::try_from(value).map(Self::True)
}
}
impl<'a> TryFrom<Cow<'a, [u8]>> for Boolean<'a> {
type Error = ();
fn try_from(c: Cow<'a, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
impl Display for Boolean<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -245,10 +336,10 @@ impl Serialize for Boolean<'_> {
/// Discriminating enum between implicit and explicit truthy values.
///
/// This enum is part of the [`Boolean`] struct.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[allow(missing_docs)]
pub enum TrueVariant<'a> {
Explicit(&'a str),
Explicit(Cow<'a, str>),
/// For values defined without a `= <value>`.
Implicit,
}
@ -270,7 +361,7 @@ impl<'a> TryFrom<&'a [u8]> for TrueVariant<'a> {
|| value.eq_ignore_ascii_case(b"true")
|| value.eq_ignore_ascii_case(b"one")
{
Ok(Self::Explicit(std::str::from_utf8(value).unwrap()))
Ok(Self::Explicit(std::str::from_utf8(value).unwrap().into()))
} else if value.is_empty() {
Ok(Self::Implicit)
} else {
@ -279,6 +370,34 @@ impl<'a> TryFrom<&'a [u8]> for TrueVariant<'a> {
}
}
impl TryFrom<String> for TrueVariant<'_> {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
Self::try_from(value.into_bytes()).map_err(|v| String::from_utf8(v).unwrap())
}
}
impl TryFrom<Vec<u8>> for TrueVariant<'_> {
type Error = Vec<u8>;
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
if value.eq_ignore_ascii_case(b"yes")
|| value.eq_ignore_ascii_case(b"on")
|| value.eq_ignore_ascii_case(b"true")
|| value.eq_ignore_ascii_case(b"one")
{
Ok(Self::Explicit(Cow::Owned(
String::from_utf8(value).unwrap(),
)))
} else if value.is_empty() {
Ok(Self::Implicit)
} else {
Err(value)
}
}
}
impl Display for TrueVariant<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Self::Explicit(v) = self {
@ -378,6 +497,24 @@ impl TryFrom<&[u8]> for Integer {
}
}
impl TryFrom<Vec<u8>> for Integer {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
impl TryFrom<Cow<'_, [u8]>> for Integer {
type Error = ();
fn try_from(c: Cow<'_, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
/// Integer prefixes that are supported by `git-config`.
///
/// These values are base-2 unit of measurements, not the base-10 variants.
@ -445,6 +582,14 @@ impl TryFrom<&[u8]> for IntegerSuffix {
}
}
impl TryFrom<Vec<u8>> for IntegerSuffix {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
/// Any value that may contain a foreground color, background color, a
/// collection of color (text) modifiers, or a combination of any of the
/// aforementioned values.
@ -565,6 +710,24 @@ impl TryFrom<&[u8]> for Color {
}
}
impl TryFrom<Vec<u8>> for Color {
type Error = ();
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
Self::try_from(value.as_ref())
}
}
impl TryFrom<Cow<'_, [u8]>> for Color {
type Error = ();
fn try_from(c: Cow<'_, [u8]>) -> Result<Self, Self::Error> {
match c {
Cow::Borrowed(c) => Self::try_from(c),
Cow::Owned(c) => Self::try_from(c).map_err(|_| ()),
}
}
}
/// Discriminating enum for [`Color`] values.
///
/// `git-config` supports the eight standard colors, their bright variants, an
@ -808,31 +971,34 @@ impl TryFrom<&[u8]> for ColorAttribute {
#[cfg(test)]
mod normalize {
use super::normalize;
use super::normalize_str;
use std::borrow::Cow;
#[test]
fn not_modified_is_borrowed() {
assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world"));
assert_eq!(normalize_str("hello world"), Cow::Borrowed(b"hello world"));
}
#[test]
fn modified_is_owned() {
assert_eq!(
normalize(b"hello \"world\""),
normalize_str("hello \"world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec())
);
}
#[test]
fn all_quoted_is_optimized() {
assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world"));
assert_eq!(
normalize_str("\"hello world\""),
Cow::Borrowed(b"hello world")
);
}
#[test]
fn all_quote_optimization_is_correct() {
assert_eq!(
normalize(br#""hello" world\""#),
normalize_str(r#""hello" world\""#),
Cow::Borrowed(b"hello world\"")
);
}
@ -840,7 +1006,7 @@ mod normalize {
#[test]
fn quotes_right_next_to_each_other() {
assert_eq!(
normalize(b"\"hello\"\" world\""),
normalize_str("\"hello\"\" world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec())
);
}
@ -848,19 +1014,19 @@ mod normalize {
#[test]
fn escaped_quotes_are_kept() {
assert_eq!(
normalize(br#""hello \"\" world""#),
normalize_str(r#""hello \"\" world""#),
Cow::<[u8]>::Owned(b"hello \"\" world".to_vec())
);
}
#[test]
fn empty_string() {
assert_eq!(normalize(b""), Cow::Borrowed(b""));
assert_eq!(normalize_str(""), Cow::Borrowed(b""));
}
#[test]
fn empty_normalized_string_is_optimized() {
assert_eq!(normalize(b"\"\""), Cow::Borrowed(b""));
assert_eq!(normalize_str("\"\""), Cow::Borrowed(b""));
}
}
@ -870,30 +1036,33 @@ mod boolean {
#[test]
fn from_str_false() {
assert_eq!(Boolean::try_from("no"), Ok(Boolean::False("no")));
assert_eq!(Boolean::try_from("off"), Ok(Boolean::False("off")));
assert_eq!(Boolean::try_from("false"), Ok(Boolean::False("false")));
assert_eq!(Boolean::try_from("zero"), Ok(Boolean::False("zero")));
assert_eq!(Boolean::try_from("\"\""), Ok(Boolean::False("\"\"")));
assert_eq!(Boolean::try_from("no"), Ok(Boolean::False("no".into())));
assert_eq!(Boolean::try_from("off"), Ok(Boolean::False("off".into())));
assert_eq!(
Boolean::try_from("false"),
Ok(Boolean::False("false".into()))
);
assert_eq!(Boolean::try_from("zero"), Ok(Boolean::False("zero".into())));
assert_eq!(Boolean::try_from("\"\""), Ok(Boolean::False("\"\"".into())));
}
#[test]
fn from_str_true() {
assert_eq!(
Boolean::try_from("yes"),
Ok(Boolean::True(TrueVariant::Explicit("yes")))
Ok(Boolean::True(TrueVariant::Explicit("yes".into())))
);
assert_eq!(
Boolean::try_from("on"),
Ok(Boolean::True(TrueVariant::Explicit("on")))
Ok(Boolean::True(TrueVariant::Explicit("on".into())))
);
assert_eq!(
Boolean::try_from("true"),
Ok(Boolean::True(TrueVariant::Explicit("true")))
Ok(Boolean::True(TrueVariant::Explicit("true".into())))
);
assert_eq!(
Boolean::try_from("one"),
Ok(Boolean::True(TrueVariant::Explicit("one")))
Ok(Boolean::True(TrueVariant::Explicit("one".into())))
);
}