Compare commits
No commits in common. "531b28ed2be4fce3bef1e8db1051fe27f2a3d1b3" and "19e18df973e9bce69f32ca347b64fc6c696c5ed5" have entirely different histories.
531b28ed2b
...
19e18df973
3 changed files with 161 additions and 323 deletions
286
src/parser.rs
286
src/parser.rs
|
@ -10,18 +10,17 @@
|
|||
//! [`GitConfig`]: crate::config::GitConfig
|
||||
|
||||
use bstr::{BStr, ByteSlice};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{escaped, tag, take_till, take_while};
|
||||
use nom::character::complete::{char, none_of, one_of};
|
||||
use nom::character::{is_newline, is_space};
|
||||
use nom::combinator::{map, opt};
|
||||
use nom::error::{Error as NomError, ErrorKind};
|
||||
use nom::multi::{many0, many1};
|
||||
use nom::sequence::delimited;
|
||||
use nom::IResult;
|
||||
use nom::{branch::alt, multi::many0};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
use std::iter::FusedIterator;
|
||||
use std::{borrow::Cow, error::Error};
|
||||
|
||||
/// Syntactic events that occurs in the config. Despite all these variants
|
||||
/// holding a [`Cow`] instead over a simple reference, the parser will only emit
|
||||
|
@ -159,77 +158,20 @@ impl Display for ParsedComment<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
/// A parser error reports the one-indexed line number where the parsing error
|
||||
/// occurred, as well as the last parser node and the remaining data to be
|
||||
/// parsed.
|
||||
/// The various parsing failure reasons.
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct ParserError<'a> {
|
||||
line_number: usize,
|
||||
last_attempted_parser: ParserNode,
|
||||
parsed_until: &'a [u8],
|
||||
pub enum ParserError<'a> {
|
||||
/// A parsing error occurred.
|
||||
InvalidInput(nom::Err<NomError<&'a [u8]>>),
|
||||
/// The config was successfully parsed, but we had extraneous data after the
|
||||
/// config file.
|
||||
ConfigHasExtraData(&'a BStr),
|
||||
}
|
||||
|
||||
impl ParserError<'_> {
|
||||
/// The one-indexed line number where the error occurred. This is determined
|
||||
/// by the number of newlines that were successfully parsed.
|
||||
pub fn line_number(&self) -> usize {
|
||||
self.line_number + 1
|
||||
}
|
||||
|
||||
/// The remaining data that was left unparsed.
|
||||
pub fn remaining_data(&self) -> &[u8] {
|
||||
self.parsed_until
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParserError<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let data_size = self.parsed_until.len();
|
||||
let data = std::str::from_utf8(self.parsed_until);
|
||||
write!(
|
||||
f,
|
||||
"Got an unexpected token on line {} while trying to parse a {}: ",
|
||||
self.line_number + 1,
|
||||
self.last_attempted_parser,
|
||||
)?;
|
||||
|
||||
match (data, data_size) {
|
||||
(Ok(data), _) if data_size > 10 => write!(
|
||||
f,
|
||||
"'{}' ... ({} characters omitted)",
|
||||
&data[..10],
|
||||
data_size - 10
|
||||
),
|
||||
(Ok(data), _) => write!(f, "'{}'", data),
|
||||
(Err(_), _) if data_size > 10 => write!(
|
||||
f,
|
||||
"'{:02x?}' ... ({} characters omitted)",
|
||||
&self.parsed_until[..10],
|
||||
data_size - 10
|
||||
),
|
||||
(Err(_), _) => write!(f, "'{:02x?}'", self.parsed_until),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for ParserError<'_> {}
|
||||
|
||||
#[derive(PartialEq, Debug, Clone, Copy)]
|
||||
enum ParserNode {
|
||||
SectionHeader,
|
||||
ConfigName,
|
||||
ConfigValue,
|
||||
Comment,
|
||||
}
|
||||
|
||||
impl Display for ParserNode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::SectionHeader => write!(f, "section header"),
|
||||
Self::ConfigName => write!(f, "config name"),
|
||||
Self::ConfigValue => write!(f, "config value"),
|
||||
Self::Comment => write!(f, "comment"),
|
||||
}
|
||||
#[doc(hidden)]
|
||||
impl<'a> From<nom::Err<NomError<&'a [u8]>>> for ParserError<'a> {
|
||||
fn from(e: nom::Err<NomError<&'a [u8]>>) -> Self {
|
||||
Self::InvalidInput(e)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -557,56 +499,19 @@ pub fn parse_from_str(input: &str) -> Result<Parser<'_>, ParserError> {
|
|||
/// This generally is due to either invalid names or if there's extraneous
|
||||
/// data succeeding valid `git-config` data.
|
||||
pub fn parse_from_bytes(input: &[u8]) -> Result<Parser<'_>, ParserError> {
|
||||
let mut newlines = 0;
|
||||
let (i, frontmatter) = many0(alt((
|
||||
map(comment, Event::Comment),
|
||||
map(take_spaces, |whitespace| {
|
||||
Event::Whitespace(Cow::Borrowed(whitespace.into()))
|
||||
}),
|
||||
map(take_newline, |(newline, counter)| {
|
||||
newlines += counter;
|
||||
map(take_newline, |newline| {
|
||||
Event::Newline(Cow::Borrowed(newline.into()))
|
||||
}),
|
||||
)))(input.as_bytes())
|
||||
// I don't think this can panic. many0 errors if the child parser returns
|
||||
// a success where the input was not consumed, but alt will only return Ok
|
||||
// if one of its children succeed. However, all of it's children are
|
||||
// guaranteed to consume something if they succeed, so the Ok(i) == i case
|
||||
// can never occur.
|
||||
.expect("many0(alt(...)) panicked. Likely a bug in one of the children parser.");
|
||||
)))(input.as_bytes())?;
|
||||
let (i, sections) = many0(section)(i)?;
|
||||
|
||||
if i.is_empty() {
|
||||
return Ok(Parser {
|
||||
frontmatter,
|
||||
sections: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
|
||||
let maybe_sections = many1(|i| section(i, &mut node))(i);
|
||||
let (i, sections) = maybe_sections.map_err(|_| ParserError {
|
||||
line_number: newlines,
|
||||
last_attempted_parser: node,
|
||||
parsed_until: i,
|
||||
})?;
|
||||
|
||||
let sections = sections
|
||||
.into_iter()
|
||||
.map(|(section, additional_newlines)| {
|
||||
newlines += additional_newlines;
|
||||
section
|
||||
})
|
||||
.collect();
|
||||
|
||||
// This needs to happen after we collect sections, otherwise the line number
|
||||
// will be off.
|
||||
if !i.is_empty() {
|
||||
return Err(ParserError {
|
||||
line_number: newlines,
|
||||
last_attempted_parser: node,
|
||||
parsed_until: i,
|
||||
});
|
||||
return Err(ParserError::ConfigHasExtraData(i.into()));
|
||||
}
|
||||
|
||||
Ok(Parser {
|
||||
|
@ -627,43 +532,28 @@ fn comment(i: &[u8]) -> IResult<&[u8], ParsedComment> {
|
|||
))
|
||||
}
|
||||
|
||||
fn section<'a, 'b>(
|
||||
i: &'a [u8],
|
||||
node: &'b mut ParserNode,
|
||||
) -> IResult<&'a [u8], (ParsedSection<'a>, usize)> {
|
||||
fn section(i: &[u8]) -> IResult<&[u8], ParsedSection> {
|
||||
let (i, section_header) = section_header(i)?;
|
||||
|
||||
let mut newlines = 0;
|
||||
// todo: unhack this (manually implement many0 and alt to avoid closure moves)
|
||||
let node = std::sync::Mutex::new(node);
|
||||
let (i, items) = many0(alt((
|
||||
map(take_spaces, |space| {
|
||||
vec![Event::Whitespace(Cow::Borrowed(space.into()))]
|
||||
}),
|
||||
map(take_newline, |(newline, counter)| {
|
||||
newlines += counter;
|
||||
map(take_newline, |newline| {
|
||||
vec![Event::Newline(Cow::Borrowed(newline.into()))]
|
||||
}),
|
||||
map(
|
||||
|i| section_body(i, *node.lock().unwrap()),
|
||||
|(key, values)| {
|
||||
map(section_body, |(key, values)| {
|
||||
let mut vec = vec![Event::Key(Cow::Borrowed(key.into()))];
|
||||
vec.extend(values);
|
||||
vec
|
||||
},
|
||||
),
|
||||
}),
|
||||
map(comment, |comment| vec![Event::Comment(comment)]),
|
||||
)))(i)?;
|
||||
|
||||
Ok((
|
||||
i,
|
||||
(
|
||||
ParsedSection {
|
||||
section_header,
|
||||
events: items.into_iter().flatten().collect(),
|
||||
},
|
||||
newlines,
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
|
@ -698,6 +588,7 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
|
|||
// Section header must be using modern subsection syntax at this point.
|
||||
|
||||
let (i, whitespace) = take_spaces(i)?;
|
||||
|
||||
let (i, subsection_name) = delimited(
|
||||
char('"'),
|
||||
opt(escaped(none_of("\"\\\n\0"), '\\', one_of(r#""\"#))),
|
||||
|
@ -718,19 +609,11 @@ fn section_header(i: &[u8]) -> IResult<&[u8], ParsedSectionHeader> {
|
|||
))
|
||||
}
|
||||
|
||||
fn section_body<'a, 'b>(
|
||||
i: &'a [u8],
|
||||
node: &'b mut ParserNode,
|
||||
) -> IResult<&'a [u8], (&'a [u8], Vec<Event<'a>>)> {
|
||||
fn section_body(i: &[u8]) -> IResult<&[u8], (&[u8], Vec<Event>)> {
|
||||
// maybe need to check for [ here
|
||||
*node = ParserNode::ConfigName;
|
||||
let (i, name) = config_name(i)?;
|
||||
|
||||
let (i, whitespace) = opt(take_spaces)(i)?;
|
||||
|
||||
*node = ParserNode::ConfigValue;
|
||||
let (i, value) = config_value(i)?;
|
||||
|
||||
if let Some(whitespace) = whitespace {
|
||||
let mut events = vec![Event::Whitespace(Cow::Borrowed(whitespace.into()))];
|
||||
events.extend(value);
|
||||
|
@ -756,6 +639,7 @@ fn config_name(i: &[u8]) -> IResult<&[u8], &[u8]> {
|
|||
code: ErrorKind::Alpha,
|
||||
}));
|
||||
}
|
||||
|
||||
take_while(|c: u8| (c as char).is_alphanumeric() || c == b'-')(i)
|
||||
}
|
||||
|
||||
|
@ -867,7 +751,19 @@ fn value_impl(i: &[u8]) -> IResult<&[u8], Vec<Event>> {
|
|||
}
|
||||
|
||||
fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
let (i, v) = take_while(|c| (c as char).is_ascii() && is_space(c))(i)?;
|
||||
take_common(i, |c| (c as char).is_ascii() && is_space(c))
|
||||
}
|
||||
|
||||
fn take_newline(i: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
take_common(i, is_char_newline)
|
||||
}
|
||||
|
||||
fn is_char_newline(c: u8) -> bool {
|
||||
(c as char).is_ascii() && is_newline(c)
|
||||
}
|
||||
|
||||
fn take_common<F: Fn(u8) -> bool>(i: &[u8], f: F) -> IResult<&[u8], &[u8]> {
|
||||
let (i, v) = take_while(f)(i)?;
|
||||
if v.is_empty() {
|
||||
Err(nom::Err::Error(NomError {
|
||||
input: i,
|
||||
|
@ -878,24 +774,15 @@ fn take_spaces(i: &[u8]) -> IResult<&[u8], &[u8]> {
|
|||
}
|
||||
}
|
||||
|
||||
fn take_newline(i: &[u8]) -> IResult<&[u8], (&[u8], usize)> {
|
||||
let mut counter = 0;
|
||||
let (i, v) = take_while(|c| (c as char).is_ascii() && is_newline(c))(i)?;
|
||||
counter += v.len();
|
||||
if v.is_empty() {
|
||||
Err(nom::Err::Error(NomError {
|
||||
input: i,
|
||||
code: ErrorKind::Eof,
|
||||
}))
|
||||
} else {
|
||||
Ok((i, (v, counter)))
|
||||
}
|
||||
#[cfg(test)]
|
||||
fn fully_consumed<T>(t: T) -> (&'static [u8], T) {
|
||||
(&[], t)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod comments {
|
||||
use super::*;
|
||||
use crate::test_util::{comment as parsed_comment, fully_consumed};
|
||||
use crate::test_util::comment as parsed_comment;
|
||||
|
||||
#[test]
|
||||
fn semicolon() {
|
||||
|
@ -925,7 +812,7 @@ mod comments {
|
|||
#[cfg(test)]
|
||||
mod section_headers {
|
||||
use super::*;
|
||||
use crate::test_util::{fully_consumed, section_header as parsed_section_header};
|
||||
use crate::test_util::section_header as parsed_section_header;
|
||||
|
||||
#[test]
|
||||
fn no_subsection() {
|
||||
|
@ -997,7 +884,6 @@ mod section_headers {
|
|||
#[cfg(test)]
|
||||
mod config_name {
|
||||
use super::*;
|
||||
use crate::test_util::fully_consumed;
|
||||
|
||||
#[test]
|
||||
fn just_name() {
|
||||
|
@ -1026,9 +912,8 @@ mod section_body {
|
|||
|
||||
#[test]
|
||||
fn whitespace_is_not_ambigious() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
assert_eq!(
|
||||
section_body(b"a =b", &mut node).unwrap().1,
|
||||
section_body(b"a =b").unwrap().1,
|
||||
(
|
||||
"a".as_bytes(),
|
||||
vec![
|
||||
|
@ -1039,7 +924,7 @@ mod section_body {
|
|||
)
|
||||
);
|
||||
assert_eq!(
|
||||
section_body(b"a= b", &mut node).unwrap().1,
|
||||
section_body(b"a= b").unwrap().1,
|
||||
(
|
||||
"a".as_bytes(),
|
||||
vec![
|
||||
|
@ -1055,7 +940,7 @@ mod section_body {
|
|||
#[cfg(test)]
|
||||
mod value_no_continuation {
|
||||
use super::*;
|
||||
use crate::test_util::{fully_consumed, value_event};
|
||||
use crate::test_util::value_event;
|
||||
|
||||
#[test]
|
||||
fn no_comment() {
|
||||
|
@ -1134,7 +1019,7 @@ mod value_no_continuation {
|
|||
#[cfg(test)]
|
||||
mod value_continuation {
|
||||
use super::*;
|
||||
use crate::test_util::{fully_consumed, newline_event, value_done_event, value_not_done_event};
|
||||
use crate::test_util::{newline_event, value_done_event, value_not_done_event};
|
||||
|
||||
#[test]
|
||||
fn simple_continuation() {
|
||||
|
@ -1197,37 +1082,30 @@ mod value_continuation {
|
|||
mod section {
|
||||
use super::*;
|
||||
use crate::test_util::{
|
||||
comment_event, fully_consumed, name_event, newline_event,
|
||||
section_header as parsed_section_header, value_done_event, value_event,
|
||||
value_not_done_event, whitespace_event,
|
||||
comment_event, name_event, newline_event, section_header as parsed_section_header,
|
||||
value_done_event, value_event, value_not_done_event, whitespace_event,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn empty_section() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
assert_eq!(
|
||||
section(b"[test]", &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(b"[test]").unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("test", None),
|
||||
events: vec![]
|
||||
},
|
||||
0
|
||||
)),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_section() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
let section_data = br#"[hello]
|
||||
a = b
|
||||
c
|
||||
d = "lol""#;
|
||||
assert_eq!(
|
||||
section(section_data, &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(section_data).unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("hello", None),
|
||||
events: vec![
|
||||
newline_event(),
|
||||
|
@ -1249,39 +1127,31 @@ mod section {
|
|||
whitespace_event(" "),
|
||||
value_event("\"lol\"")
|
||||
]
|
||||
},
|
||||
3
|
||||
))
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_single_line() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
assert_eq!(
|
||||
section(b"[hello] c", &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(b"[hello] c").unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("hello", None),
|
||||
events: vec![whitespace_event(" "), name_event("c"), value_event("")]
|
||||
},
|
||||
0
|
||||
))
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_very_commented() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
let section_data = br#"[hello] ; commentA
|
||||
a = b # commentB
|
||||
; commentC
|
||||
; commentD
|
||||
c = d"#;
|
||||
assert_eq!(
|
||||
section(section_data, &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(section_data).unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("hello", None),
|
||||
events: vec![
|
||||
whitespace_event(" "),
|
||||
|
@ -1309,24 +1179,16 @@ mod section {
|
|||
whitespace_event(" "),
|
||||
value_event("d"),
|
||||
]
|
||||
},
|
||||
4
|
||||
))
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complex_continuation() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
// This test is absolute hell. Good luck if this fails.
|
||||
assert_eq!(
|
||||
section(
|
||||
b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c",
|
||||
&mut node
|
||||
)
|
||||
.unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(b"[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c").unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("section", None),
|
||||
events: vec![
|
||||
whitespace_event(" "),
|
||||
|
@ -1342,19 +1204,15 @@ mod section {
|
|||
whitespace_event(" "),
|
||||
comment_event('#', " \"b\t ; c"),
|
||||
]
|
||||
},
|
||||
0
|
||||
))
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quote_split_over_two_lines() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
assert_eq!(
|
||||
section(b"[section \"a\"] b =\"\\\n;\";a", &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(b"[section \"a\"] b =\"\\\n;\";a").unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("section", (" ", "a")),
|
||||
events: vec![
|
||||
whitespace_event(" "),
|
||||
|
@ -1366,19 +1224,15 @@ mod section {
|
|||
value_done_event(";\""),
|
||||
comment_event(';', "a"),
|
||||
]
|
||||
},
|
||||
0
|
||||
))
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_handles_extranous_whitespace_before_comment() {
|
||||
let mut node = ParserNode::SectionHeader;
|
||||
assert_eq!(
|
||||
section(b"[s]hello #world", &mut node).unwrap(),
|
||||
fully_consumed((
|
||||
ParsedSection {
|
||||
section(b"[s]hello #world").unwrap(),
|
||||
fully_consumed(ParsedSection {
|
||||
section_header: parsed_section_header("s", None),
|
||||
events: vec![
|
||||
name_event("hello"),
|
||||
|
@ -1386,9 +1240,7 @@ mod section {
|
|||
value_event(""),
|
||||
comment_event('#', "world"),
|
||||
]
|
||||
},
|
||||
0
|
||||
))
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,7 +67,3 @@ pub(crate) fn comment(comment_tag: char, comment: &'static str) -> ParsedComment
|
|||
comment: Cow::Borrowed(comment.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn fully_consumed<T>(t: T) -> (&'static [u8], T) {
|
||||
(&[], t)
|
||||
}
|
||||
|
|
|
@ -216,13 +216,3 @@ fn newline_events_are_merged() {
|
|||
vec![newline_custom("\n\n\n\n\n")]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error() {
|
||||
let input = "[core] a=b\n 4a=3";
|
||||
println!("{}", parse_from_str(input).unwrap_err());
|
||||
let input = "[core] a=b\n =3";
|
||||
println!("{}", parse_from_str(input).unwrap_err());
|
||||
let input = "[core";
|
||||
println!("{}", parse_from_str(input).unwrap_err());
|
||||
}
|
||||
|
|
Reference in a new issue