Compare commits

..

2 commits

Author SHA1 Message Date
99f0400118
implement unquoting in normalize 2021-02-27 18:48:05 -05:00
300fb6bbfb
add normalize 2021-02-27 15:40:11 -05:00

View file

@ -1,9 +1,110 @@
//! Rust containers for valid `git-config` types. //! Rust containers for valid `git-config` types.
use crate::parser::Event;
use bstr::{BStr, ByteSlice}; use bstr::{BStr, ByteSlice};
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::{Serialize, Serializer}; use serde::{Serialize, Serializer};
use std::{borrow::Cow, convert::TryFrom, fmt::Display, str::FromStr}; use std::borrow::Cow;
use std::convert::TryFrom;
use std::fmt::Display;
use std::str::FromStr;
/// Removes quotes, if any, from the provided inputs. This assumes the input
/// contains a even number of unescaped quotes, and will unescape escaped quotes.
/// The return values should be safe for value interpretation.
///
/// This has optimizations for fully-quoted values, where the returned value
/// will be a borrowed reference if the only mutation necessary is to unquote
/// the value.
///
/// # Examples
///
/// Values don't need modification are returned borrowed, without allocation.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world".into()));
/// ```
///
/// Fully quoted values are optimized to not need allocations.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world".into()));
/// ```
///
/// Quoted values are unwrapped as an owned variant.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(b"hello \"world\""), Cow::<[u8]>::Owned(b"hello world".to_vec()));
/// ```
///
/// Escaped quotes are ignored.
///
/// ```
/// # use std::borrow::Cow;
/// # use git_config::values::normalize;
/// assert_eq!(normalize(br#"hello "world\"""#), Cow::<[u8]>::Owned(br#"hello world""#.to_vec()));
/// ```
pub fn normalize(input: &[u8]) -> Cow<'_, [u8]> {
let mut first_index = 0;
let mut last_index = 0;
let size = input.len();
if input == b"\"\"" {
return Cow::Borrowed(&[]);
}
if size >= 3 {
if input[0] == b'=' && input[size - 1] == b'=' && input[size - 2] != b'\\' {
return normalize(&input[1..size]);
}
}
let mut owned = vec![];
let mut was_escaped = false;
for (i, c) in input.iter().enumerate() {
if was_escaped {
was_escaped = false;
if *c == b'"' {
if first_index == 0 {
owned.extend(dbg!(&input[last_index..i - 1]));
last_index = i;
} else {
owned.extend(dbg!(&input[first_index..i - 1]));
first_index = i;
}
}
continue;
}
if *c == b'\\' {
was_escaped = true;
} else if *c == b'"' {
if first_index == 0 {
owned.extend(dbg!(&input[last_index..i]));
first_index = i + 1;
} else {
owned.extend(dbg!(&input[first_index..i]));
first_index = 0;
last_index = i + 1;
}
}
}
owned.extend(dbg!(&input[last_index..]));
if owned.is_empty() {
Cow::Borrowed(input)
} else {
Cow::Owned(owned)
}
}
/// Fully enumerated valid types that a `git-config` value can be. /// Fully enumerated valid types that a `git-config` value can be.
#[allow(missing_docs)] #[allow(missing_docs)]
@ -24,6 +125,38 @@ impl<'a> Value<'a> {
} }
} }
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum ValueEventConversionError {
ValueNotDone,
NoValue,
}
// impl<'a, 'b> TryFrom<&'b [Event<'a>]> for Value<'a> {
// type Error = ValueEventConversionError;
// fn try_from(events: &'b [Event<'a>]) -> Result<Self, Self::Error> {
// let mut v = vec![];
// for event in events {
// match event {
// Event::Value(v) => return Ok(Self::from(v.borrow())),
// Event::ValueNotDone(value) => v.extend(value.borrow()),
// Event::ValueDone(value) => {
// v.extend(value.borrow());
// // return Ok(Self::from(v));
// }
// _ => (),
// }
// }
// if v.is_empty() {
// Err(Self::Error::NoValue)
// } else {
// Err(Self::Error::ValueNotDone)
// }
// }
// }
impl<'a> From<&'a str> for Value<'a> { impl<'a> From<&'a str> for Value<'a> {
fn from(s: &'a str) -> Self { fn from(s: &'a str) -> Self {
if let Ok(bool) = Boolean::try_from(s) { if let Ok(bool) = Boolean::try_from(s) {
@ -46,23 +179,19 @@ impl<'a> From<&'a [u8]> for Value<'a> {
fn from(s: &'a [u8]) -> Self { fn from(s: &'a [u8]) -> Self {
// All parsable values must be utf-8 valid // All parsable values must be utf-8 valid
if let Ok(s) = std::str::from_utf8(s) { if let Ok(s) = std::str::from_utf8(s) {
if let Ok(bool) = Boolean::try_from(s) { Self::from(s)
return Self::Boolean(bool); } else {
}
if let Ok(int) = Integer::from_str(s) {
return Self::Integer(int);
}
if let Ok(color) = Color::from_str(s) {
return Self::Color(color);
}
}
Self::Other(Cow::Borrowed(s.as_bstr())) Self::Other(Cow::Borrowed(s.as_bstr()))
} }
}
} }
// impl From<Vec<u8>> for Value<'_> {
// fn from(_: Vec<u8>) -> Self {
// todo!()
// }
// }
// todo display for value // todo display for value
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
@ -656,6 +785,64 @@ impl TryFrom<&[u8]> for ColorAttribute {
} }
} }
#[cfg(test)]
mod normalize {
use super::normalize;
use std::borrow::Cow;
#[test]
fn not_modified_is_borrowed() {
assert_eq!(normalize(b"hello world"), Cow::Borrowed(b"hello world"));
}
#[test]
fn modified_is_owned() {
assert_eq!(
normalize(b"hello \"world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec())
);
}
#[test]
fn all_quoted_is_optimized() {
assert_eq!(normalize(b"\"hello world\""), Cow::Borrowed(b"hello world"));
}
#[test]
fn all_quote_optimization_is_correct() {
assert_eq!(
normalize(br#""hello" world\""#),
Cow::Borrowed(b"hello world\"")
);
}
#[test]
fn quotes_right_next_to_each_other() {
assert_eq!(
normalize(b"\"hello\"\" world\""),
Cow::<[u8]>::Owned(b"hello world".to_vec())
);
}
#[test]
fn escaped_quotes_are_kept() {
assert_eq!(
normalize(br#""hello \"\" world""#),
Cow::<[u8]>::Owned(b"hello \"\" world".to_vec())
);
}
#[test]
fn empty_string() {
assert_eq!(normalize(b""), Cow::Borrowed(b""));
}
#[test]
fn empty_normalized_string_is_optimized() {
assert_eq!(normalize(b"\"\""), Cow::Borrowed(b""));
}
}
#[cfg(test)] #[cfg(test)]
mod boolean { mod boolean {
use super::*; use super::*;