commit 30b054df31598fd9aff4aebb8739cdba553f2c3f Author: Edward Shen Date: Thu Feb 18 12:49:47 2021 -0500 Complete initial parser diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f931197 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "serde-git-config" +version = "0.1.0" +authors = ["Edward Shen "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = "1.0" +nom = "6" + +[dev-dependencies] +serde_derive = "1.0" \ No newline at end of file diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..9b85af0 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,410 @@ +use std::ops::{AddAssign, MulAssign, Neg}; + +use crate::values::Boolean; +use crate::{ + error::{Error, Result}, + values::PeekParse, +}; +use serde::de::{ + self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess, + Visitor, +}; + +use serde::Deserialize; + +pub struct Deserializer<'de> { + // This string starts with the input data and characters are truncated off + // the beginning as data is parsed. + input: &'de str, +} + +impl<'de> Deserializer<'de> { + // By convention, `Deserializer` constructors are named like `from_xyz`. + // That way basic use cases are satisfied by something like + // `serde_json::from_str(...)` while advanced use cases that require a + // deserializer can make one with `serde_json::Deserializer::from_str(...)`. + pub fn from_str(input: &'de str) -> Self { + Deserializer { input } + } +} + +// By convention, the public API of a Serde deserializer is one or more +// `from_xyz` methods such as `from_str`, `from_bytes`, or `from_reader` +// depending on what Rust types the deserializer is able to consume as input. +// +// This basic deserializer supports only `from_str`. +pub fn from_str<'a, T>(s: &'a str) -> Result +where + T: Deserialize<'a>, +{ + let mut deserializer = Deserializer::from_str(s); + let t = T::deserialize(&mut deserializer)?; + if deserializer.input.is_empty() { + Ok(t) + } else { + todo!() + } +} + +impl<'de> Deserializer<'de> { + fn peek(&mut self) -> Result { + self.input.chars().next().ok_or(Error::Eof) + } + + fn next(&mut self) -> Result { + let ch = self.peek()?; + self.input = &self.input[ch.len_utf8()..]; + Ok(ch) + } + + fn parse_bool(&mut self) -> Result { + let (value, size) = Boolean::peek_parse(self.input)?; + self.input = &self.input[size..]; + Ok(value) + } + + fn parse_unsigned(&mut self) -> Result { + self.parse_int(true) + } + + fn parse_signed(&mut self) -> Result { + self.parse_int(false) + } + + fn parse_int(&mut self, positive_only: bool) -> Result { + self.consume_whitespace()?; + + match self.next()? { + c @ '0'..='9' => { + let mut significand = (c as u8 - b'0') as u64; + + loop { + match self.peek()? { + c @ '0'..='9' => { + let digit = (c as u8 - b'0') as u64; + + if significand.wrapping_mul(10).wrapping_add(digit) < u64::MAX {} + + let _ = self.next(); + significand = significand * 10 + digit; + } + _ => { + // return self.parse_number(positive, significand); + todo!() + } + } + } + } + _ => Err(Error::InvalidInteger), + } + } + + fn consume_whitespace(&mut self) -> Result<()> { + loop { + match self.peek()? { + ' ' | '\n' | '\t' | '\r' => { + let _ = self.next(); + } + _ => { + return Ok(()); + } + } + } + } +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.peek()? { + _ => todo!(), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_bool(self.parse_bool()?) + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i8(self.parse_signed()?) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i16(self.parse_signed()?) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.parse_signed()?) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.parse_signed()?) + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u8(self.parse_unsigned()?) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u16(self.parse_unsigned()?) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u32(self.parse_unsigned()?) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u64(self.parse_unsigned()?) + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_f64(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!("Cannot deserialize into a float value! Use a integer variant instead.") + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.consume_whitespace()?; + Ok(visitor.visit_map(self)?) + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_map(self) + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + todo!() + } +} + +impl<'de, 'a> MapAccess<'de> for Deserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + // A map section is ended when another section begins or we hit EOL. + // Therefore, we only check if a next section begins or in the case of + // EOL indicate that we're done. + if self.peek().unwrap_or('[') == '[' { + return Ok(None); + } + + seed.deserialize(self).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + seed.deserialize(self) + } +} + +#[cfg(test)] +mod deserialize { + use crate::from_str; + use serde_derive::Deserialize; + + #[test] + fn unit() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test; + assert_eq!(Test, from_str("").unwrap()); + assert_eq!((), from_str("").unwrap()); + } + + #[test] + #[should_panic] + fn float() { + from_str::("").unwrap(); + } + + #[test] + fn basic() { + #[derive(Deserialize, PartialEq, Debug)] + struct Config { + user: User, + } + + #[derive(Deserialize, PartialEq, Debug)] + struct User { + email: String, + name: String, + } + + let expected = Config { + user: User { + email: "code@eddie.sh".to_string(), + name: "Edward Shen".to_string(), + }, + }; + + assert_eq!( + expected, + from_str("[user]\nemail=code@eddie.sh\nname=Edward Shen\n").unwrap() + ); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..74f15b3 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,42 @@ +use std::fmt::{self, Display}; + +use serde::{de, ser}; + +pub type Result = std::result::Result; + +// This is a bare-bones implementation. A real library would provide additional +// information in its error type, for example the line and column at which the +// error occurred, the byte offset into the input, or the current key being +// processed. +#[derive(Clone, Debug, PartialEq)] +pub enum Error { + Message(String), + Eof, + InvalidInteger, + InvalidBoolean(String), +} + +impl ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Message(msg.to_string()) + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Self { + Error::Message(msg.to_string()) + } +} + +impl Display for Error { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Message(msg) => formatter.write_str(msg), + Error::Eof => formatter.write_str("unexpected end of input"), + Error::InvalidInteger => formatter.write_str("invalid integer given"), + Error::InvalidBoolean(_) => formatter.write_str("invalid boolean given"), + } + } +} + +impl std::error::Error for Error {} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..beb7fee --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,9 @@ +// mod de; +mod error; +// mod ser; +pub mod parser; +mod values; + +// pub use de::{from_str, Deserializer}; +pub use error::{Error, Result}; +// pub use ser::{to_string, Serializer}; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..7e0124f --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,698 @@ +use crate::values::{Boolean, TrueVariant, Value}; +use nom::bytes::complete::{escaped, tag, take_till, take_while}; +use nom::character::complete::{char, none_of, one_of}; +use nom::character::{is_newline, is_space}; +use nom::combinator::{map, opt}; +use nom::error::{Error as NomError, ErrorKind}; +use nom::multi::many1; +use nom::sequence::delimited; +use nom::IResult; +use nom::{branch::alt, multi::many0}; + +/// An event is any syntactic event that occurs in the config. +#[derive(PartialEq, Debug)] +pub enum Event<'a> { + Comment(Comment<'a>), + Key(&'a str), + /// Represents any token used to signify a new line character. On Unix + /// platforms, this is typically just `\n`, but can be any valid newline + /// sequence. + Newline(&'a str), + /// + Value(Value<'a>), + /// Any value that isn't completed. This occurs when the value is continued + /// onto the next line. A Newline event is guaranteed after, followed by + /// either another ValueNotDone or a ValueDone. + ValueNotDone(&'a str), + /// The last line of a value which was continued onto another line. + ValueDone(&'a str), +} + +#[derive(PartialEq, Debug)] +pub struct Section<'a> { + section_header: SectionHeader<'a>, + items: Vec>, +} + +#[derive(PartialEq, Debug)] +pub struct SectionHeader<'a> { + name: &'a str, + subsection_name: Option<&'a str>, +} + +#[derive(PartialEq, Debug)] +pub struct Comment<'a> { + comment_tag: char, + comment: &'a str, +} + +pub struct Parser<'a> { + init_comments: Vec>, + sections: Vec>, +} + +pub fn parse(input: &str) -> Result, ()> { + let (i, comments) = many0(comment)(input).unwrap(); + let (i, sections) = many1(section)(i).unwrap(); + + Ok(Parser { + init_comments: comments, + sections, + }) +} + +fn comment<'a>(i: &'a str) -> IResult<&'a str, Comment<'a>> { + let i = i.trim_start(); + let (i, comment_tag) = one_of(";#")(i)?; + let (i, comment) = take_till(is_char_newline)(i)?; + Ok(( + i, + Comment { + comment_tag, + comment, + }, + )) +} + +fn section<'a>(i: &'a str) -> IResult<&'a str, Section<'a>> { + let (i, section_header) = section_header(i)?; + // need alt here for eof? + let (i, items) = many1(alt(( + map(section_body, |(key, values)| { + let mut vec = vec![Event::Key(key)]; + vec.extend(values); + vec + }), + map(comment, |comment| vec![Event::Comment(comment)]), + )))(i)?; + Ok(( + i, + Section { + section_header, + items: items.into_iter().flatten().collect(), + }, + )) +} + +fn section_header<'a>(i: &'a str) -> IResult<&'a str, SectionHeader<'a>> { + let (i, _) = char('[')(i)?; + // No spaces must be between section name and section start + let (i, name) = take_while(|c: char| c.is_ascii_alphanumeric() || c == '-' || c == '.')(i)?; + + if let Ok((i, _)) = char::<_, NomError<&str>>(']')(i) { + // Either section does not have a subsection or using deprecated + // subsection syntax at this point. + let header = match name.rfind('.') { + Some(index) => SectionHeader { + name: &name[..index], + subsection_name: Some(&name[index + 1..]), + }, + None => SectionHeader { + name: name, + subsection_name: None, + }, + }; + + return Ok((i, header)); + } + + // Section header must be using modern subsection syntax at this point. + + let (i, _) = take_spaces(i)?; + + let (i, subsection_name) = delimited( + char('"'), + opt(escaped(none_of("\"\\\n\0"), '\\', one_of(r#""\"#))), + tag("\"]"), + )(i)?; + + Ok(( + i, + SectionHeader { + name: name, + // We know that there's some section name here, so if we get an + // empty vec here then we actually parsed an empty section name. + subsection_name: subsection_name.or(Some("")), + }, + )) +} + +fn take_spaces<'a>(i: &'a str) -> IResult<&'a str, &'a str> { + take_while(|c: char| c.is_ascii() && is_space(c as u8))(i) +} + +fn section_body<'a>(i: &'a str) -> IResult<&'a str, (&'a str, Vec>)> { + let i = i.trim_start(); + // maybe need to check for [ here + let (i, name) = config_name(i)?; + let (i, _) = take_spaces(i)?; + let (i, value) = config_value(i)?; + + Ok((i, (name, value))) +} + +/// Parses the config name of a config pair. Assumes the input has already been +/// trimmed of any leading whitespace. +fn config_name<'a>(i: &'a str) -> IResult<&'a str, &'a str> { + if i.is_empty() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::NonEmpty, + })); + } + + if !i.chars().nth(0).unwrap().is_alphabetic() { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Alpha, + })); + } + + take_while(|c: char| c.is_alphanumeric() || c == '-')(i) +} + +fn config_value<'a>(i: &'a str) -> IResult<&'a str, Vec>> { + if let (i, Some(_)) = opt(char('='))(i)? { + let (i, _) = take_spaces(i)?; + value_impl(i) + } else { + Ok(( + i, + vec![Event::Value(Value::Boolean(Boolean::True( + TrueVariant::Implicit, + )))], + )) + } +} + +fn value_impl<'a>(i: &'a str) -> IResult<&'a str, Vec>> { + // I wrote this code and don't know how it works. + // + // Even after sleeping on it I still don't know how it works. + + let mut events = vec![]; + let mut parsed_index: usize = 0; + let mut offset: usize = 0; + + let mut was_prev_char_escape_char = false; + // This is required to ignore comment markers if they're in a quote. + let mut is_in_quotes = false; + // Used to determine if we return a Value or Value{Not,}Done + let mut partial_value_found = false; + + for (index, c) in i.as_bytes().iter().enumerate() { + if was_prev_char_escape_char { + was_prev_char_escape_char = false; + match c { + // We're escaping a newline, which means we've found a + // continuation. + b'\n' => { + partial_value_found = true; + events.push(Event::ValueNotDone(&i[offset..index - 1])); + events.push(Event::Newline(&i[index..index + 1])); + offset = index + 1; + parsed_index = 0; + } + b't' | b'\\' | b'n' | b'"' => (), + _ => { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Escaped, + })); + } + } + } else { + match c { + b'\n' => { + parsed_index = index; + break; + } + b';' | b'#' if !is_in_quotes => { + parsed_index = index; + break; + } + b'\\' => was_prev_char_escape_char = true, + b'"' => is_in_quotes = !is_in_quotes, + _ => {} + } + } + } + + if parsed_index == 0 { + parsed_index = i.len(); + } + + // Handle incomplete escape + if was_prev_char_escape_char { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Escaped, + })); + } + + // Handle incomplete quotes + if is_in_quotes { + return Err(nom::Err::Error(NomError { + input: i, + code: ErrorKind::Tag, + })); + } + + let remainder_value = &i[offset..parsed_index].trim_end(); + if partial_value_found { + events.push(Event::ValueDone(remainder_value)); + } else { + events.push(Event::Value(Value::from_str(remainder_value))); + } + + Ok((&i[parsed_index..], events)) +} + +fn is_char_newline(c: char) -> bool { + c.is_ascii() && is_newline(c as u8) +} + +#[cfg(test)] +mod parse { + use super::*; + + fn fully_consumed(t: T) -> (&'static str, T) { + ("", t) + } + + mod comments { + use super::super::*; + use super::*; + + #[test] + fn semicolon() { + assert_eq!( + comment("; this is a semicolon comment").unwrap(), + fully_consumed(Comment { + comment_tag: ';', + comment: " this is a semicolon comment", + }) + ); + } + + #[test] + fn octothorpe() { + assert_eq!( + comment("# this is an octothorpe comment").unwrap(), + fully_consumed(Comment { + comment_tag: '#', + comment: " this is an octothorpe comment", + }) + ); + } + + #[test] + fn multiple_markers() { + assert_eq!( + comment("###### this is an octothorpe comment").unwrap(), + fully_consumed(Comment { + comment_tag: '#', + comment: "##### this is an octothorpe comment", + }) + ); + } + } + + mod section_headers { + use super::super::*; + use super::*; + + #[test] + fn no_subsection() { + assert_eq!( + section_header("[hello]").unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: None + }) + ); + } + + #[test] + fn modern_subsection() { + assert_eq!( + section_header(r#"[hello "world"]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some("world") + }) + ); + } + + #[test] + fn escaped_subsection() { + assert_eq!( + section_header(r#"[hello "foo\\bar\""]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some(r#"foo\\bar\""#) + }) + ); + } + + #[test] + fn deprecated_subsection() { + assert_eq!( + section_header(r#"[hello.world]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some("world") + }) + ); + } + + #[test] + fn empty_legacy_subsection_name() { + assert_eq!( + section_header(r#"[hello.]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some("") + }) + ); + } + + #[test] + fn empty_modern_subsection_name() { + assert_eq!( + section_header(r#"[hello ""]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some("") + }) + ); + } + + #[test] + fn newline_in_header() { + assert!(section_header("[hello\n]").is_err()) + } + + #[test] + fn null_byte_in_header() { + assert!(section_header("[hello\0]").is_err()) + } + + #[test] + fn right_brace_in_subsection_name() { + assert_eq!( + section_header(r#"[hello "]"]"#).unwrap(), + fully_consumed(SectionHeader { + name: "hello", + subsection_name: Some("]") + }) + ); + } + } + + mod config_name { + use super::super::*; + use super::*; + + #[test] + fn just_name() { + assert_eq!(config_name("name").unwrap(), fully_consumed("name")); + } + + #[test] + fn must_start_with_alphabetic() { + assert!(config_name("4aaa").is_err()); + assert!(config_name("-aaa").is_err()); + } + + #[test] + fn cannot_be_empty() { + assert!(config_name("").is_err()) + } + } + + mod value_no_continuation { + use super::super::*; + use super::*; + + #[test] + fn no_comment() { + assert_eq!( + value_impl("hello").unwrap(), + fully_consumed(vec![Event::Value(Value::Other("hello"))]) + ); + } + + #[test] + fn no_comment_newline() { + assert_eq!( + value_impl("hello\na").unwrap(), + ("\na", vec![Event::Value(Value::Other("hello"))]) + ) + } + + #[test] + fn no_comment_is_trimmed() { + assert_eq!( + value_impl("hello").unwrap(), + value_impl("hello ").unwrap() + ); + } + + #[test] + fn semicolon_comment_not_consumed() { + assert_eq!( + value_impl("hello;world").unwrap(), + (";world", vec![Event::Value(Value::Other("hello")),]) + ); + } + + #[test] + fn octothorpe_comment_not_consumed() { + assert_eq!( + value_impl("hello#world").unwrap(), + ("#world", vec![Event::Value(Value::Other("hello")),]) + ); + } + + #[test] + fn values_with_comments_are_trimmed() { + assert_eq!( + value_impl("hello#world").unwrap(), + value_impl("hello #world").unwrap(), + ); + assert_eq!( + value_impl("hello;world").unwrap(), + value_impl("hello ;world").unwrap(), + ); + } + + #[test] + fn trans_escaped_comment_marker_not_consumed() { + assert_eq!( + value_impl(r##"hello"#"world; a"##).unwrap(), + ( + "; a", + vec![Event::Value(Value::Other(r##"hello"#"world"##)),] + ) + ); + } + + #[test] + fn complex_test() { + assert_eq!( + value_impl(r#"value";";ahhhh"#).unwrap(), + (";ahhhh", vec![Event::Value(Value::Other(r#"value";""#)),]) + ); + } + + #[test] + fn garbage_after_continution_is_err() { + assert!(value_impl("hello \\afwjdls").is_err()); + } + } + + mod value_continuation { + use super::super::*; + use super::*; + + #[test] + fn simple_continuation() { + assert_eq!( + value_impl("hello\\\nworld").unwrap(), + fully_consumed(vec![ + Event::ValueNotDone("hello"), + Event::Newline("\n"), + Event::ValueDone("world") + ]) + ); + } + #[test] + fn complex_continuation_with_leftover_comment() { + assert_eq!( + value_impl("1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c").unwrap(), + ( + "# \"b\t ; c", + vec![ + Event::ValueNotDone(r#"1 "\""#), + Event::Newline("\n"), + Event::ValueNotDone(r#"a ; e "\""#), + Event::Newline("\n"), + Event::ValueDone("d"), + ] + ) + ); + } + + #[test] + fn quote_split_over_two_lines_with_leftover_comment() { + assert_eq!( + value_impl("\"\\\n;\";a").unwrap(), + ( + ";a", + vec![ + Event::ValueNotDone("\""), + Event::Newline("\n"), + Event::ValueDone(";\""), + ] + ) + ) + } + } + + mod section { + use super::super::*; + use super::*; + + #[test] + fn simple_section() { + let section_data = r#"[hello] + a = b + c + d = "lol""#; + assert_eq!( + section(section_data).unwrap(), + fully_consumed(Section { + section_header: SectionHeader { + name: "hello", + subsection_name: None, + }, + items: vec![ + Event::Key("a"), + Event::Value(Value::from_str("b")), + Event::Key("c"), + Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))), + Event::Key("d"), + Event::Value(Value::from_str("\"lol\"")) + ] + }) + ) + } + + #[test] + fn section_single_line() { + assert_eq!( + section("[hello] c").unwrap(), + fully_consumed(Section { + section_header: SectionHeader { + name: "hello", + subsection_name: None, + }, + items: vec![ + Event::Key("c"), + Event::Value(Value::Boolean(Boolean::True(TrueVariant::Implicit))) + ] + }) + ); + } + + #[test] + fn section_very_commented() { + let section_data = r#"[hello] ; commentA + a = b # commentB + ; commentC + ; commentD + c = d"#; + assert_eq!( + section(section_data).unwrap(), + fully_consumed(Section { + section_header: SectionHeader { + name: "hello", + subsection_name: None, + }, + items: vec![ + Event::Comment(Comment { + comment_tag: ';', + comment: " commentA", + }), + Event::Key("a"), + Event::Value(Value::from_str("b")), + Event::Comment(Comment { + comment_tag: '#', + comment: " commentB", + }), + Event::Comment(Comment { + comment_tag: ';', + comment: " commentC", + }), + Event::Comment(Comment { + comment_tag: ';', + comment: " commentD", + }), + Event::Key("c"), + Event::Value(Value::from_str("d")), + ] + }) + ); + } + + #[test] + fn complex_continuation() { + // This test is absolute hell. Good luck if this fails. + assert_eq!( + section("[section] a = 1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c").unwrap(), + fully_consumed(Section { + section_header: SectionHeader { + name: "section", + subsection_name: None, + }, + items: vec![ + Event::Key("a"), + Event::ValueNotDone(r#"1 "\""#), + Event::Newline("\n"), + Event::ValueNotDone(r#"a ; e "\""#), + Event::Newline("\n"), + Event::ValueDone("d"), + Event::Comment(Comment { + comment_tag: '#', + comment: " \"b\t ; c" + }) + ] + }) + ); + } + + #[test] + fn quote_split_over_two_lines() { + assert_eq!( + section("[section \"a\"] b =\"\\\n;\";a").unwrap(), + fully_consumed(Section { + section_header: SectionHeader { + name: "section", + subsection_name: Some("a") + }, + items: vec![ + Event::Key("b"), + Event::ValueNotDone("\""), + Event::Newline("\n"), + Event::ValueDone(";\""), + Event::Comment(Comment { + comment: "a", + comment_tag: ';' + }) + ] + }) + ) + } + } +} diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..8aeeca9 --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,184 @@ +use crate::error::{Error, Result}; +use ser::SerializeSeq; +use serde::{de::MapAccess, ser, Serialize}; + +pub struct Serializer { + output: String, +} + +pub fn to_string(value: &T) -> Result +where + T: Serialize, +{ + let mut serializer = Serializer { + output: String::new(), + }; + value.serialize(&mut serializer)?; + Ok(serializer.output) +} + +impl<'a> ser::Serializer for &'a mut Serializer { + type Ok = (); + type Error = Error; + type SerializeSeq = Self; + type SerializeTuple = Self; + type SerializeTupleStruct = Self; + type SerializeTupleVariant = Self; + type SerializeMap = Self; + type SerializeStruct = Self; + type SerializeStructVariant = Self; + + fn serialize_bool(self, v: bool) -> Result { + self.output += if v { "true" } else { "false" }; + Ok(()) + } + + fn serialize_i8(self, v: i8) -> Result { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i16(self, v: i16) -> Result { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i32(self, v: i32) -> Result { + self.serialize_i64(i64::from(v)) + } + + fn serialize_i64(self, v: i64) -> Result { + self.output += &v.to_string(); + Ok(()) + } + + fn serialize_u8(self, v: u8) -> Result { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u16(self, v: u16) -> Result { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u32(self, v: u32) -> Result { + self.serialize_u64(u64::from(v)) + } + + fn serialize_u64(self, v: u64) -> Result { + self.output += &v.to_string(); + Ok(()) + } + + fn serialize_f32(self, v: f32) -> Result { + self.serialize_f64(f64::from(v)) + } + + fn serialize_f64(self, v: f64) -> Result { + self.output += &v.to_string(); + Ok(()) + } + + fn serialize_char(self, v: char) -> Result { + self.output += &v.to_string(); + Ok(()) + } + + fn serialize_str(self, v: &str) -> Result { + self.output += v; + Ok(()) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + todo!() + } + + fn serialize_none(self) -> Result { + todo!() + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_unit(self) -> Result { + todo!() + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + todo!() + } + + fn serialize_unit_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + ) -> Result { + todo!() + } + + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_newtype_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + todo!() + } + + fn serialize_seq(self, len: Option) -> Result { + todo!() + } + + fn serialize_tuple(self, len: usize) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_struct( + self, + name: &'static str, + len: usize, + ) -> Result { + self.serialize_seq(Some(len)) + } + + fn serialize_tuple_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } + + fn serialize_map(self, len: Option) -> Result { + todo!() + } + + fn serialize_struct(self, name: &'static str, len: usize) -> Result { + todo!() + } + + fn serialize_struct_variant( + self, + name: &'static str, + variant_index: u32, + variant: &'static str, + len: usize, + ) -> Result { + todo!() + } +} diff --git a/src/values.rs b/src/values.rs new file mode 100644 index 0000000..0747f2d --- /dev/null +++ b/src/values.rs @@ -0,0 +1,142 @@ +use std::convert::{Infallible, TryFrom}; + +#[derive(PartialEq, Debug)] +pub enum Value<'a> { + Boolean(Boolean), + Integer(Integer), + Color(Color), + Other(&'a str), +} + +impl<'a> Value<'a> { + pub fn from_str(s: &'a str) -> Self { + Self::Other(s) + } +} + +#[derive(PartialEq, Debug)] +pub enum Boolean { + True(TrueVariant), + False(FalseVariant), +} + +#[derive(PartialEq, Debug)] +pub enum TrueVariant { + Yes, + On, + True, + One, + /// For variables defined without a `= `. + Implicit, +} + +impl TryFrom<&str> for TrueVariant { + type Error = (); + + fn try_from(value: &str) -> Result { + match value { + "yes" => Ok(Self::Yes), + "on" => Ok(Self::On), + "true" => Ok(Self::True), + "one" => Ok(Self::One), + _ => Err(()), + } + } +} + +#[derive(PartialEq, Debug)] +pub enum FalseVariant { + No, + Off, + False, + Zero, + EmptyString, +} + +impl TryFrom<&str> for FalseVariant { + type Error = (); + + fn try_from(value: &str) -> Result { + match value { + "no" => Ok(Self::No), + "off" => Ok(Self::Off), + "false" => Ok(Self::False), + "zero" => Ok(Self::Zero), + "" => Ok(Self::EmptyString), + _ => Err(()), + } + } +} + +impl TryFrom<&str> for Boolean { + type Error = (); + + fn try_from(value: &str) -> Result { + let value = value.to_lowercase(); + let value = value.as_str(); + if let Ok(v) = TrueVariant::try_from(value) { + return Ok(Self::True(v)); + } + + if let Ok(v) = FalseVariant::try_from(value) { + return Ok(Self::False(v)); + } + + Err(()) + } +} + +// todo!() +#[derive(PartialEq, Debug)] +pub struct Integer {} + +#[derive(PartialEq, Debug)] +pub struct Color { + foreground: ColorValue, + background: Option, + attributes: Vec, +} + +#[derive(PartialEq, Debug)] +enum ColorValue { + Normal, + Black, + BrightBlack, + Red, + BrightRed, + Green, + BrightGreen, + Yellow, + BrightYellow, + Blue, + BrightBlue, + Magenta, + BrightMagenta, + Cyan, + BrightCyan, + White, + BrightWhite, + Ansi(u8), + Rgb(u8, u8, u8), +} + +#[derive(PartialEq, Debug)] +enum ColorAttribute { + Bold, + NoBold, + Dim, + NoDim, + Ul, + NoUl, + Blink, + NoBlink, + Reverse, + NoReverse, + Italic, + NoItalic, + Strike, + NoStrike, +} + +#[derive(PartialEq, Debug)] +struct Pathname<'a>(&'a str);