Compare commits

..

7 commits

3 changed files with 323 additions and 161 deletions

View file

@ -10,17 +10,18 @@
//! [`GitConfig`]: crate::config::GitConfig //! [`GitConfig`]: crate::config::GitConfig
use bstr::{BStr, ByteSlice}; use bstr::{BStr, ByteSlice};
use nom::branch::alt;
use nom::bytes::complete::{escaped, tag, take_till, take_while}; use nom::bytes::complete::{escaped, tag, take_till, take_while};
use nom::character::complete::{char, none_of, one_of}; use nom::character::complete::{char, none_of, one_of};
use nom::character::{is_newline, is_space}; use nom::character::{is_newline, is_space};
use nom::combinator::{map, opt}; use nom::combinator::{map, opt};
use nom::error::{Error as NomError, ErrorKind}; use nom::error::{Error as NomError, ErrorKind};
use nom::multi::{many0, many1};
use nom::sequence::delimited; use nom::sequence::delimited;
use nom::IResult; use nom::IResult;
use nom::{branch::alt, multi::many0};
use std::borrow::Cow;
use std::fmt::Display; use std::fmt::Display;
use std::iter::FusedIterator; use std::iter::FusedIterator;
use std::{borrow::Cow, error::Error};
/// Syntactic events that occurs in the config. Despite all these variants /// Syntactic events that occurs in the config. Despite all these variants
/// holding a [`Cow`] instead over a simple reference, the parser will only emit /// holding a [`Cow`] instead over a simple reference, the parser will only emit
@ -158,20 +159,77 @@ impl Display for ParsedComment<'_> {
} }
} }
/// The various parsing failure reasons. /// A parser error reports the one-indexed line number where the parsing error
/// occurred, as well as the last parser node and the remaining data to be
/// parsed.
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
pub enum ParserError<'a> { pub struct ParserError<'a> {
/// A parsing error occurred. line_number: usize,
InvalidInput(nom::Err<NomError<&'a [u8]>>), last_attempted_parser: ParserNode,
/// The config was successfully parsed, but we had extraneous data after the parsed_until: &'a [u8],
/// config file.
ConfigHasExtraData(&'a BStr),
} }
#[doc(hidden)] impl ParserError<'_> {
impl<'a> From<nom::Err<NomError<&'a [u8]>>> for ParserError<'a> { /// The one-indexed line number where the error occurred. This is determined
fn from(e: nom::Err<NomError<&'a [u8]>>) -> Self { /// by the number of newlines that were successfully parsed.
Self::InvalidInput(e) pub fn line_number(&self) -> usize {
self.line_number + 1
}
/// The remaining data that was left unparsed.
pub fn remaining_data(&self) -> &[u8] {
self.parsed_until
}
}
impl Display for ParserError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let data_size = self.parsed_until.len();
let data = std::str::from_utf8(self.parsed_until);
write!(
f,
"Got an unexpected token on line {} while trying to parse a {}: ",
self.line_number + 1,
self.last_attempted_parser,
)?;
match (data, data_size) {
(Ok(data), _) if data_size > 10 => write!(
f,
"'{}' ... ({} characters omitted)",
&data[..10],
data_size - 10
),
(Ok(data), _) => write!(f, "'{}'", data),
(Err(_), _) if data_size > 10 => write!(
f,
"'{:02x?}' ... ({} characters omitted)",
&self.parsed_until[..10],
data_size - 10
),
(Err(_), _) => write!(f, "'{:02x?}'", self.parsed_until),
}
}
}
impl Error for ParserError<'_> {}
#[derive(PartialEq, Debug, Clone, Copy)]
enum ParserNode {
SectionHeader,
ConfigName,
ConfigValue,
Comment,
}
impl Display for ParserNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::SectionHeader => write!(f, "section header"),
Self::ConfigName => write!(f, "config name"),
Self::ConfigValue => write!(f, "config value"),
Self::Comment => write!(f, "comment"),
}
} }
} }
@ -499,19 +557,56 @@ pub fn parse_from_str(input: &str) -> Result<Parser<'_>, ParserError> {
/// This generally is due to either invalid names or if there's extraneous /// This generally is due to either invalid names or if there's extraneous
/// data succeeding valid `git-config` data. /// data succeeding valid `git-config` data.
pub fn parse_from_bytes(input: &[u8]) -> Result<Parser<'_>, ParserError> { pub fn parse_from_bytes(input: &[u8]) -> Result<Parser<'_>, ParserError> {
let mut newlines = 0;
let (i, frontmatter) = many0(alt(( let (i, frontmatter) = many0(alt((
map(comment, Event::Comment), map(comment, Event::Comment),
map(take_spaces, |whitespace| { map(take_spaces, |whitespace| {
Event::Whitespace(Cow::Borrowed(whitespace.into())) Event::Whitespace(Cow::Borrowed(whitespace.into()))
}), }),
map(take_newline, |newline| { map(take_newline, |(newline, counter)| {
newlines += counter;
Event::Newline(Cow::Borrowed(newline.into())) Event::Newline(Cow::Borrowed(newline.into()))
}), }),
)))(input.as_bytes())?; )))(input.as_bytes())
let (i, sections) = many0(section)(i)?; // I don't think this can panic. many0 errors if the child parser returns
// a success where the input was not consumed, but alt will only return Ok
// if one of its children succeed. However, all of it's children are
// guaranteed to consume something if they succeed, so the Ok(i) == i case
// can never occur.
.expect("many0(alt(...)) panicked. Likely a bug in one of the children parser.");
if i.is_empty() {
return Ok(Parser {
frontmatter,
sections: vec![],
});
}
let mut node = ParserNode::SectionHeader;
let maybe_sections = many1(|i| section(i, &mut node))(i);
let (i, sections) = maybe_sections.map_err(|_| ParserError {
line_number: newlines,
last_attempted_parser: node,
parsed_until: i,
})?;
let sections = sections
.into_iter()
.map(|(section, additional_newlines)| {
newlines += additional_newlines;
section
})
.collect();
// This needs to happen after we collect sections, otherwise the line number
// will be off.
if !i.is_empty() { if !i.is_empty() {
return Err(ParserError::ConfigHasExtraData(i.into())); return Err(ParserError {
line_number: newlines,
last_attempted_parser: node,
parsed_until: i,
});
} }
Ok(Parser { Ok(Parser {
@ -532,28 +627,43 @@ fn comment(i: &[u8]) -> IResult<&[u8], ParsedComment> {
)) ))
} }
fn section(i: &[u8]) -> IResult<&[u8], ParsedSection> { fn section<'a, 'b>(
i: &'a [u8],
node: &'b mut ParserNode,
) -> IResult<&'a [u8], (ParsedSection<'a>, usize)> {
let (i, section_header) = section_header(i)?; let (i, section_header) = section_header(i)?;
let mut newlines = 0;
// todo: unhack this (manually implement many0 and alt to avoid closure moves)
let node = std::sync::Mutex::new(node);
let (i, items) = many0(alt(( let (i, items) = many0(alt((
map(take_spaces, |space| { map(take_spaces, |space| {
vec![Event::Whitespace(Cow::Borrowed(space.into()))] vec![Event::Whitespace(Cow::Borrowed(space.into()))]
}), }),
map(take_newline, |newline| { map(take_newline, |(newline, counter)| {
newlines += counter;
vec![Event::Newline(Cow::Borrowed(newline.into()))] vec![Event::Newline(Cow::Borrowed(newline.into()))]
}), }),
map(section_body, |(key, values)| { map(
|i| section_body(i, *node.lock().unwrap()),
|(key, values)| {
let mut vec = vec![Event::Key(Cow::Borrowed(key.into()))]; let mut vec = vec![Event::Key(Cow::Borrowed(key.into()))];
vec.extend(values); vec.extend(values);
vec vec
}), },
),
map(comment, |comment| vec![Event::Comment(comment)]), map(comment, |comment| vec![Event::Comment(comment)]),
)))(i)?; )))(i)?;
Ok(( Ok((
i, i,
(
ParsedSection { ParsedSection {
section_header, section_header,
events: items.into_iter().flatten().collect(), events: items.into_iter().flatten().collect(),
}, },
newlines,
),
)) ))
} }
@ -588,7 +698,6 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
// Section header must be using modern subsection syntax at this point. // Section header must be using modern subsection syntax at this point.
let (i, whitespace) = take_spaces(i)?; let (i, whitespace) = take_spaces(i)?;
let (i, subsection_name) = delimited( let (i, subsection_name) = delimited(
char('"'), char('"'),
opt(escaped(none_of("\"\\\n\0"), '\\', one_of(r#""\"#))), opt(escaped(none_of("\"\\\n\0"), '\\', one_of(r#""\"#))),
@ -609,11 +718,19 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
)) ))
} }
fn section_body(i: &[u8]) -> IResult<&[u8], (&[u8], Vec<Event>)> { fn section_body<'a, 'b>(
i: &'a [u8],
node: &'b mut ParserNode,
) -> IResult<&'a [u8], (&'a [u8], Vec<Event<'a>>)> {
// maybe need to check for [ here // maybe need to check for [ here
*node = ParserNode::ConfigName;
let (i, name) = config_name(i)?; let (i, name) = config_name(i)?;
let (i, whitespace) = opt(take_spaces)(i)?; let (i, whitespace) = opt(take_spaces)(i)?;
*node = ParserNode::ConfigValue;
let (i, value) = config_value(i)?; let (i, value) = config_value(i)?;
if let Some(whitespace) = whitespace { if let Some(whitespace) = whitespace {
let mut events = vec![Event::Whitespace(Cow::Borrowed(whitespace.into()))]; let mut events = vec![Event::Whitespace(Cow::Borrowed(whitespace.into()))];
events.extend(value); events.extend(value);
@ -639,7 +756,6 @@ fn config_name(i: &[u8]) -> IResult<&[u8], &[u8]> {
code: ErrorKind::Alpha, code: ErrorKind::Alpha,
})); }));
} }
take_while(|c: u8| (c as char).is_alphanumeric() || c == b'-')(i) take_while(|c: u8| (c as char).is_alphanumeric() || c == b'-')(i)
} }
@ -751,19 +867,7 @@ fn value_impl(i: &[u8]) -> IResult<&[u8], Vec<Event>> {
} }
fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> { fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_common(i, |c| (c as char).is_ascii() && is_space(c)) let (i, v) = take_while(|c| (c as char).is_ascii() && is_space(c))(i)?;
}
fn take_newline(i: &[u8]) -> IResult<&[u8], &[u8]> {
take_common(i, is_char_newline)
}
fn is_char_newline(c: u8) -> bool {
(c as char).is_ascii() && is_newline(c)
}
fn take_common<F: Fn(u8) -> bool>(i: &[u8], f: F) -> IResult<&[u8], &[u8]> {
let (i, v) = take_while(f)(i)?;
if v.is_empty() { if v.is_empty() {
Err(nom::Err::Error(NomError { Err(nom::Err::Error(NomError {
input: i, input: i,
@ -774,15 +878,24 @@ fn take_common<F: Fn(u8) -> bool>(i: &[u8], f: F) -> IResult<&[u8], &[u8]> {
} }
} }
#[cfg(test)] fn take_newline(i: &[u8]) -> IResult<&[u8], (&[u8], usize)> {
fn fully_consumed<T>(t: T) -> (&'static [u8], T) { let mut counter = 0;
(&[], t) let (i, v) = take_while(|c| (c as char).is_ascii() && is_newline(c))(i)?;
counter += v.len();
if v.is_empty() {
Err(nom::Err::Error(NomError {
input: i,
code: ErrorKind::Eof,
}))
} else {
Ok((i, (v, counter)))
}
} }
#[cfg(test)] #[cfg(test)]
mod comments { mod comments {
use super::*; use super::*;
use crate::test_util::comment as parsed_comment; use crate::test_util::{comment as parsed_comment, fully_consumed};
#[test] #[test]
fn semicolon() { fn semicolon() {
@ -812,7 +925,7 @@ mod comments {
#[cfg(test)] #[cfg(test)]
mod section_headers { mod section_headers {
use super::*; use super::*;
use crate::test_util::section_header as parsed_section_header; use crate::test_util::{fully_consumed, section_header as parsed_section_header};
#[test] #[test]
fn no_subsection() { fn no_subsection() {
@ -884,6 +997,7 @@ mod section_headers {
#[cfg(test)] #[cfg(test)]
mod config_name { mod config_name {
use super::*; use super::*;
use crate::test_util::fully_consumed;
#[test] #[test]
fn just_name() { fn just_name() {
@ -912,8 +1026,9 @@ mod section_body {
#[test] #[test]
fn whitespace_is_not_ambigious() { fn whitespace_is_not_ambigious() {
let mut node = ParserNode::SectionHeader;
assert_eq!( assert_eq!(
section_body(b"a =b").unwrap().1, section_body(b"a =b", &mut node).unwrap().1,
( (
"a".as_bytes(), "a".as_bytes(),
vec![ vec![
@ -924,7 +1039,7 @@ mod section_body {
) )
); );
assert_eq!( assert_eq!(
section_body(b"a= b").unwrap().1, section_body(b"a= b", &mut node).unwrap().1,
( (
"a".as_bytes(), "a".as_bytes(),
vec![ vec![
@ -940,7 +1055,7 @@ mod section_body {
#[cfg(test)] #[cfg(test)]
mod value_no_continuation { mod value_no_continuation {
use super::*; use super::*;
use crate::test_util::value_event; use crate::test_util::{fully_consumed, value_event};
#[test] #[test]
fn no_comment() { fn no_comment() {
@ -1019,7 +1134,7 @@ mod value_no_continuation {
#[cfg(test)] #[cfg(test)]
mod value_continuation { mod value_continuation {
use super::*; use super::*;
use crate::test_util::{newline_event, value_done_event, value_not_done_event}; use crate::test_util::{fully_consumed, newline_event, value_done_event, value_not_done_event};
#[test] #[test]
fn simple_continuation() { fn simple_continuation() {
@ -1082,30 +1197,37 @@ mod value_continuation {
mod section { mod section {
use super::*; use super::*;
use crate::test_util::{ use crate::test_util::{
comment_event, name_event, newline_event, section_header as parsed_section_header, comment_event, fully_consumed, name_event, newline_event,
value_done_event, value_event, value_not_done_event, whitespace_event, section_header as parsed_section_header, value_done_event, value_event,
value_not_done_event, whitespace_event,
}; };
#[test] #[test]
fn empty_section() { fn empty_section() {
let mut node = ParserNode::SectionHeader;
assert_eq!( assert_eq!(
section(b"[test]").unwrap(), section(b"[test]", &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("test", None), section_header: parsed_section_header("test", None),
events: vec![] events: vec![]
}) },
0
)),
); );
} }
#[test] #[test]
fn simple_section() { fn simple_section() {
let mut node = ParserNode::SectionHeader;
let section_data = br#"[hello] let section_data = br#"[hello]
a = b a = b
c c
d = "lol""#; d = "lol""#;
assert_eq!( assert_eq!(
section(section_data).unwrap(), section(section_data, &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("hello", None), section_header: parsed_section_header("hello", None),
events: vec![ events: vec![
newline_event(), newline_event(),
@ -1127,31 +1249,39 @@ mod section {
whitespace_event(" "), whitespace_event(" "),
value_event("\"lol\"") value_event("\"lol\"")
] ]
}) },
3
))
) )
} }
#[test] #[test]
fn section_single_line() { fn section_single_line() {
let mut node = ParserNode::SectionHeader;
assert_eq!( assert_eq!(
section(b"[hello] c").unwrap(), section(b"[hello] c", &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("hello", None), section_header: parsed_section_header("hello", None),
events: vec![whitespace_event(" "), name_event("c"), value_event("")] events: vec![whitespace_event(" "), name_event("c"), value_event("")]
}) },
0
))
); );
} }
#[test] #[test]
fn section_very_commented() { fn section_very_commented() {
let mut node = ParserNode::SectionHeader;
let section_data = br#"[hello] ; commentA let section_data = br#"[hello] ; commentA
a = b # commentB a = b # commentB
; commentC ; commentC
; commentD ; commentD
c = d"#; c = d"#;
assert_eq!( assert_eq!(
section(section_data).unwrap(), section(section_data, &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("hello", None), section_header: parsed_section_header("hello", None),
events: vec![ events: vec![
whitespace_event(" "), whitespace_event(" "),
@ -1179,16 +1309,24 @@ mod section {
whitespace_event(" "), whitespace_event(" "),
value_event("d"), value_event("d"),
] ]
}) },
4
))
); );
} }
#[test] #[test]
fn complex_continuation() { fn complex_continuation() {
let mut node = ParserNode::SectionHeader;
// This test is absolute hell. Good luck if this fails. // This test is absolute hell. Good luck if this fails.
assert_eq!( assert_eq!(
section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c").unwrap(), section(
fully_consumed(ParsedSection { b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c",
&mut node
)
.unwrap(),
fully_consumed((
ParsedSection {
section_header: parsed_section_header("section", None), section_header: parsed_section_header("section", None),
events: vec![ events: vec![
whitespace_event(" "), whitespace_event(" "),
@ -1204,15 +1342,19 @@ mod section {
whitespace_event(" "), whitespace_event(" "),
comment_event('#', " \"b\t ; c"), comment_event('#', " \"b\t ; c"),
] ]
}) },
0
))
); );
} }
#[test] #[test]
fn quote_split_over_two_lines() { fn quote_split_over_two_lines() {
let mut node = ParserNode::SectionHeader;
assert_eq!( assert_eq!(
section(b"[section \"a\"] b =\"\\\n;\";a").unwrap(), section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("section", (" ", "a")), section_header: parsed_section_header("section", (" ", "a")),
events: vec![ events: vec![
whitespace_event(" "), whitespace_event(" "),
@ -1224,15 +1366,19 @@ mod section {
value_done_event(";\""), value_done_event(";\""),
comment_event(';', "a"), comment_event(';', "a"),
] ]
}) },
0
))
) )
} }
#[test] #[test]
fn section_handles_extranous_whitespace_before_comment() { fn section_handles_extranous_whitespace_before_comment() {
let mut node = ParserNode::SectionHeader;
assert_eq!( assert_eq!(
section(b"[s]hello #world").unwrap(), section(b"[s]hello #world", &mut node).unwrap(),
fully_consumed(ParsedSection { fully_consumed((
ParsedSection {
section_header: parsed_section_header("s", None), section_header: parsed_section_header("s", None),
events: vec![ events: vec![
name_event("hello"), name_event("hello"),
@ -1240,7 +1386,9 @@ mod section {
value_event(""), value_event(""),
comment_event('#', "world"), comment_event('#', "world"),
] ]
}) },
0
))
); );
} }
} }

View file

@ -67,3 +67,7 @@ pub(crate) fn comment(comment_tag: char, comment: &'static str) -> ParsedComment
comment: Cow::Borrowed(comment.into()), comment: Cow::Borrowed(comment.into()),
} }
} }
pub(crate) fn fully_consumed<T>(t: T) -> (&'static [u8], T) {
(&[], t)
}

View file

@ -216,3 +216,13 @@ fn newline_events_are_merged() {
vec![newline_custom("\n\n\n\n\n")] vec![newline_custom("\n\n\n\n\n")]
); );
} }
#[test]
fn error() {
let input = "[core] a=b\n 4a=3";
println!("{}", parse_from_str(input).unwrap_err());
let input = "[core] a=b\n =3";
println!("{}", parse_from_str(input).unwrap_err());
let input = "[core";
println!("{}", parse_from_str(input).unwrap_err());
}