more work on parser

This commit is contained in:
Edward Shen 2021-02-19 12:08:42 -05:00
parent d63b1f7ab3
commit eacb903dfd
Signed by: edward
GPG key ID: 19182661E818369F
5 changed files with 39 additions and 34 deletions

View file

@ -7,7 +7,8 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
serde = { version = "1.0", features = ["derive"] } serde = "1.0"
nom = "6" nom = "6"
[dev-dependencies] [dev-dependencies]
serde_derive = "1.0"

View file

@ -8,7 +8,7 @@ type SectionConfig<'a> = HashMap<&'a str, Value<'a>>;
/// This struct provides a high level wrapper to access `git-config` file. This /// This struct provides a high level wrapper to access `git-config` file. This
/// struct exists primarily for reading a config rather than modifying it, as /// struct exists primarily for reading a config rather than modifying it, as
/// it discards comments and unnecessary whitespace. /// it discards comments and unnecessary whitespace.
#[derive(Clone, Eq, PartialEq, Debug, Default, Serialize)] #[derive(Clone, Eq, PartialEq, Debug, Default)]
pub struct GitConfig<'a>(HashMap<&'a str, HashMap<&'a str, SectionConfig<'a>>>); pub struct GitConfig<'a>(HashMap<&'a str, HashMap<&'a str, SectionConfig<'a>>>);
const EMPTY_MARKER: &str = "@"; // Guaranteed to not be a {sub,}section or name. const EMPTY_MARKER: &str = "@"; // Guaranteed to not be a {sub,}section or name.

View file

@ -1,3 +1,12 @@
//! This module handles parsing a `git-config`. Generally speaking, you want to
//! use a higher abstraction unless you have some explicit reason to work with
//! events instead.
//!
//! The general workflow for interacting with this is to use one of the
//! `parse_from_*` function variants. These will return a [`Parser`] on success,
//! which can be converted into an [`Event`] iterator. The [`Parser`] also has
//! additional methods for accessing leading comments or events by section.
use crate::values::{Boolean, TrueVariant, Value}; use crate::values::{Boolean, TrueVariant, Value};
use nom::bytes::complete::{escaped, tag, take_till, take_while}; use nom::bytes::complete::{escaped, tag, take_till, take_while};
use nom::character::complete::{char, none_of, one_of}; use nom::character::complete::{char, none_of, one_of};
@ -10,7 +19,7 @@ use nom::IResult;
use nom::{branch::alt, multi::many0}; use nom::{branch::alt, multi::many0};
use std::iter::FusedIterator; use std::iter::FusedIterator;
/// Syntactic event that occurs in the config. /// Syntactic events that occurs in the config.
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum Event<'a> { pub enum Event<'a> {
Comment(ParsedComment<'a>), Comment(ParsedComment<'a>),
@ -32,34 +41,22 @@ pub enum Event<'a> {
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
pub struct ParsedSection<'a> { pub struct ParsedSection<'a> {
section_header: ParsedSectionHeader<'a>, pub section_header: ParsedSectionHeader<'a>,
items: Vec<Event<'a>>, pub events: Vec<Event<'a>>,
}
impl ParsedSection<'_> {
pub fn header(&self) -> &ParsedSectionHeader<'_> {
&self.section_header
}
pub fn take_header(&mut self) -> ParsedSectionHeader<'_> {
self.section_header
}
pub fn events(&self) -> &[Event<'_>] {
&self.items
}
} }
/// A parsed section header, containing a name and optionally a subsection name.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
pub struct ParsedSectionHeader<'a> { pub struct ParsedSectionHeader<'a> {
pub name: &'a str, pub name: &'a str,
pub subsection_name: Option<&'a str>, pub subsection_name: Option<&'a str>,
} }
/// A parsed comment event containing the comment marker and comment.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
pub struct ParsedComment<'a> { pub struct ParsedComment<'a> {
comment_tag: char, pub comment_tag: char,
comment: &'a str, pub comment: &'a str,
} }
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
@ -116,7 +113,12 @@ impl<'a> From<nom::Err<NomError<&'a str>>> for ParserError<'a> {
/// Note that that things such as case-sensitivity or duplicate sections are /// Note that that things such as case-sensitivity or duplicate sections are
/// _not_ handled. This parser is a low level _syntactic_ interpreter (as a /// _not_ handled. This parser is a low level _syntactic_ interpreter (as a
/// parser should be), and higher level wrappers around this parser (which may /// parser should be), and higher level wrappers around this parser (which may
/// or may not be zero-copy) should handle _semantic_ values. /// or may not be zero-copy) should handle _semantic_ values. This also means
/// that string-like values are not interpreted. For example, `hello"world"`
/// would be read at a high level as `helloworld` but this parser will return
/// the former instead, with the extra quotes. This is because it is not the
/// responsibility of the parser to interpret these values, and doing so would
/// necessarily require a copy, which this parser avoids.
/// ///
/// # Trait Implementations /// # Trait Implementations
/// ///
@ -139,7 +141,7 @@ impl<'a> Parser<'a> {
/// of a `git-config` file and can be converted into an iterator of [`Event`] /// of a `git-config` file and can be converted into an iterator of [`Event`]
/// for higher level processing. /// for higher level processing.
/// ///
/// This function is identical to [`parse`]. /// This function is identical to [`parse_from_str`].
/// ///
/// # Errors /// # Errors
/// ///
@ -186,7 +188,7 @@ impl<'a> Parser<'a> {
.map(|section| { .map(|section| {
vec![Event::SectionHeader(section.section_header)] vec![Event::SectionHeader(section.section_header)]
.into_iter() .into_iter()
.chain(section.items) .chain(section.events)
}) })
.flatten(); .flatten();
self.init_comments self.init_comments
@ -247,7 +249,7 @@ fn section<'a>(i: &'a str) -> IResult<&'a str, ParsedSection<'a>> {
i, i,
ParsedSection { ParsedSection {
section_header, section_header,
items: items.into_iter().flatten().collect(), events: items.into_iter().flatten().collect(),
}, },
)) ))
} }
@ -738,7 +740,7 @@ mod parse {
name: "hello", name: "hello",
subsection_name: None, subsection_name: None,
}, },
items: vec![ events: vec![
Event::Key("a"), Event::Key("a"),
Event::Value(Value::from_str("b")), Event::Value(Value::from_str("b")),
Event::Key("c"), Event::Key("c"),
@ -759,7 +761,7 @@ mod parse {
name: "hello", name: "hello",
subsection_name: None, subsection_name: None,
}, },
items: vec![ events: vec![
Event::Key("c"), Event::Key("c"),
Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))) Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit)))
] ]
@ -781,7 +783,7 @@ mod parse {
name: "hello", name: "hello",
subsection_name: None, subsection_name: None,
}, },
items: vec![ events: vec![
Event::Comment(ParsedComment { Event::Comment(ParsedComment {
comment_tag: ';', comment_tag: ';',
comment: " commentA", comment: " commentA",
@ -817,7 +819,7 @@ mod parse {
name: "section", name: "section",
subsection_name: None, subsection_name: None,
}, },
items: vec![ events: vec![
Event::Key("a"), Event::Key("a"),
Event::ValueNotDone(r#"1 "\""#), Event::ValueNotDone(r#"1 "\""#),
Event::Newline("\n"), Event::Newline("\n"),
@ -842,7 +844,7 @@ mod parse {
name: "section", name: "section",
subsection_name: Some("a") subsection_name: Some("a")
}, },
items: vec![ events: vec![
Event::Key("b"), Event::Key("b"),
Event::ValueNotDone("\""), Event::ValueNotDone("\""),
Event::Newline("\n"), Event::Newline("\n"),

View file

@ -80,7 +80,9 @@ pub enum TrueVariant {
True, True,
One, One,
/// For variables defined without a `= <value>`. This can never be created /// For variables defined without a `= <value>`. This can never be created
/// from the FromStr trait, as an empty string is false without context. /// from the [`FromStr`] trait, as an empty string is false without context.
/// If directly serializing this struct (instead of using a higher level
/// wrapper), then this variant is serialized as if it was [`Self::True`].
Implicit, Implicit,
} }

View file

@ -67,10 +67,10 @@ fn personal_config() {
value("gpg"), value("gpg"),
section_header("url", "ssh://git@github.com/"), section_header("url", "ssh://git@github.com/"),
name("insteadOf"), name("insteadOf"),
value("github://"), value("\"github://\""),
section_header("url", "ssh://git@git.eddie.sh/edward/"), section_header("url", "ssh://git@git.eddie.sh/edward/"),
name("insteadOf"), name("insteadOf"),
value("gitea://"), value("\"gitea://\""),
section_header("pull", None), section_header("pull", None),
name("ff"), name("ff"),
value("only"), value("only"),