11use nom:: {
22 branch:: alt,
3- bytes:: complete:: { tag, take_till, take_until, take_until1 , take_while, take_while1} ,
3+ bytes:: complete:: { tag, take_till, take_until, take_while, take_while1} ,
44 character:: complete:: { char, newline} ,
5- combinator:: { consumed, flat_map, map , opt, peek, recognize, rest} ,
6- complete :: take ,
5+ combinator:: { consumed, flat_map, opt, peek, recognize, rest} ,
6+ error :: ErrorKind ,
77 multi:: { many0, many_till} ,
88 sequence:: { delimited, pair, preceded, terminated, tuple} ,
99 IResult , Parser ,
@@ -33,7 +33,7 @@ fn is_ascii_whitespace(char: char) -> bool {
3333 char. is_ascii_whitespace ( )
3434}
3535
36- // https://html.spec.whatwg.org/multipage/syntax.html#attributes-2
36+ /// See < https://html.spec.whatwg.org/multipage/syntax.html#attributes-2>.
3737fn parse_attribute_name ( input : & str ) -> IResult < & str , & str > {
3838 take_while1 ( |char : char | {
3939 !matches ! ( char ,
@@ -99,10 +99,12 @@ fn parse_attribute(input: &str) -> IResult<&str, (&str, Option<&str>)> {
9999 ) ( input)
100100}
101101
102+ /// See <https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name>.
102103fn parse_tag_name ( input : & str ) -> IResult < & str , & str > {
103104 take_till ( |char : char | char. is_ascii_whitespace ( ) || char == '/' || char == '>' ) ( input)
104105}
105106
107+ /// See <https://html.spec.whatwg.org/multipage/syntax.html#start-tags>.
106108fn parse_start_tag ( input : & str ) -> IResult < & str , StartTag > {
107109 let ( input, _) = char ( '<' ) ( input) ?;
108110
@@ -123,13 +125,56 @@ fn parse_start_tag(input: &str) -> IResult<&str, StartTag> {
123125 Ok ( ( input, StartTag { name, lang } ) )
124126}
125127
128+ fn take_until_next < ' a > ( pat : & ' a str ) -> impl FnMut ( & ' a str ) -> IResult < & ' a str , & ' a str > {
129+ preceded ( opt ( tag ( pat) ) , take_until ( pat) )
130+ }
131+
126132fn parse_tag_content < ' a > ( tag_name : & ' a str ) -> impl FnMut ( & ' a str ) -> IResult < & ' a str , & ' a str > {
127- recognize ( many_till (
128- preceded ( opt ( char ( '<' ) ) , take_until ( "<" ) ) ,
129- peek ( parse_end_tag ( tag_name) ) ,
130- ) )
133+ move |input : & str | {
134+ let mut nesting_level = 0u16 ;
135+ let mut index = match input. find ( '<' ) {
136+ Some ( index) => index,
137+ None => {
138+ return Err ( nom:: Err :: Error ( nom:: error:: Error :: new (
139+ input,
140+ ErrorKind :: TakeUntil ,
141+ ) ) )
142+ }
143+ } ;
144+
145+ while !input[ index..] . is_empty ( ) {
146+ if let Ok ( ( _, start_tag) ) = parse_start_tag ( & input[ index..] ) {
147+ if start_tag. name == tag_name {
148+ nesting_level += 1 ;
149+ }
150+ } else if let Ok ( ( _, _) ) = parse_end_tag ( tag_name) ( & input[ index..] ) {
151+ if nesting_level == 0 {
152+ return Ok ( ( & input[ index..] , & input[ ..index] ) ) ;
153+ }
154+
155+ nesting_level -= 1 ;
156+ }
157+
158+ index += match input. get ( ( index + 1 ) ..) . and_then ( |input| input. find ( '<' ) ) {
159+ Some ( index) => index + 1 ,
160+ None => {
161+ return Err ( nom:: Err :: Error ( nom:: error:: Error :: new (
162+ input,
163+ ErrorKind :: TakeUntil ,
164+ ) ) )
165+ }
166+ } ;
167+ }
168+
169+ Err ( nom:: Err :: Error ( nom:: error:: Error :: new (
170+ input,
171+ ErrorKind :: TakeUntil ,
172+ ) ) )
173+ }
131174}
132175
176+ /// Parse an end tag with the given `tag_name`.
177+ /// See <https://html.spec.whatwg.org/multipage/syntax.html#end-tags>.
133178fn parse_end_tag < ' a > ( tag_name : & ' a str ) -> impl FnMut ( & ' a str ) -> IResult < & ' a str , & ' a str > {
134179 delimited (
135180 tag ( "</" ) ,
@@ -150,9 +195,9 @@ fn parse_block(input: &str) -> IResult<&str, Block> {
150195 ) )
151196 . map ( move |( content, raw_end_tag) | Block {
152197 start_tag,
153- content,
154198 raw_start_tag,
155199 raw_end_tag,
200+ content,
156201 } )
157202 } ,
158203 )
@@ -163,10 +208,7 @@ fn parse_section(input: &str) -> IResult<&str, Section> {
163208 alt ( (
164209 parse_block. map ( Section :: Block ) ,
165210 alt ( (
166- recognize ( many_till (
167- preceded ( opt ( char ( '<' ) ) , take_until ( "<" ) ) ,
168- peek ( parse_block) ,
169- ) ) ,
211+ recognize ( many_till ( take_until_next ( "<" ) , peek ( parse_block) ) ) ,
170212 rest,
171213 ) )
172214 . map ( Section :: Root ) ,
@@ -177,7 +219,8 @@ pub fn parse_file(mut input: &str) -> Result<Vec<Section>, anyhow::Error> {
177219 let mut buffer = Vec :: new ( ) ;
178220
179221 loop {
180- let ( remaining, section) = parse_section ( input) . map_err ( |err| err. to_owned ( ) ) ?;
222+ let ( remaining, section) =
223+ parse_section ( input) . map_err ( |err| anyhow:: Error :: from ( err. to_owned ( ) ) ) ?;
181224
182225 buffer. push ( section) ;
183226
@@ -271,6 +314,11 @@ mod test {
271314 "let value = Math.random();\n console.log(value < 0.5);\n "
272315 ) )
273316 ) ;
317+
318+ assert_eq ! (
319+ parse_tag_content( "template" ) ( "<template></template></template>" ) ,
320+ Ok ( ( "</template>" , "<template></template>" ) )
321+ ) ;
274322 }
275323
276324 #[ test]
0 commit comments