Skip to content

Commit 4d5441d

Browse files
committed
Refactor parser utils to return the RcDomWithLineNumbers
This allows the various processors access to not just the Document, but the RcDomWithLineNumbers instance. This is useful for various operations; subsequent commits, for example, will store the line numbers and use them in error message displays.
1 parent 8a7c99c commit 4d5441d

10 files changed

+114
-66
lines changed

src/anchor_permanence.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,73 +87,82 @@ mod tests {
8787
use super::*;
8888
use crate::dom_utils;
8989
use crate::parser::{parse_document_async, tests::serialize_for_test};
90+
use std::io;
9091

9192
#[tokio::test]
92-
async fn removes_script_from_head() {
93-
let document = parse_document_async(r#"<!DOCTYPE html>
93+
async fn removes_script_from_head() -> io::Result<()> {
94+
let parsed = parse_document_async(r#"<!DOCTYPE html>
9495
<html><head><script type="text/required-ids">a b c</script></head><body><div id="a"></div><p id="b"></p><section id="c"></section></body></html>
95-
"#.as_bytes()).await.unwrap();
96+
"#.as_bytes()).await?;
97+
let document = parsed.document().clone();
9698
let mut processor = Processor::new();
9799
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
98100
processor.apply().unwrap();
99101
let serialized = serialize_for_test(&[document]);
100102
assert!(!serialized.contains("text/required-ids"));
103+
Ok(())
101104
}
102105

103106
#[tokio::test]
104-
async fn no_script_present_noop() {
105-
let document = parse_document_async(
107+
async fn no_script_present_noop() -> io::Result<()> {
108+
let parsed = parse_document_async(
106109
r#"<!DOCTYPE html>
107110
<html><head></head><body></body></html>
108111
"#
109112
.as_bytes(),
110113
)
111-
.await
112-
.unwrap();
114+
.await?;
115+
let document = parsed.document().clone();
113116
let before = serialize_for_test(&[document.clone()]);
114117
let mut processor = Processor::new();
115118
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
116119
processor.apply().unwrap();
117120
assert_eq!(before, serialize_for_test(&[document]));
121+
Ok(())
118122
}
119123

120124
#[tokio::test]
121-
async fn whitespace_splitting() {
125+
async fn whitespace_splitting() -> io::Result<()> {
122126
// Includes indentation, multiple spaces, and newlines in the script content.
123-
let document = parse_document_async(r#"<!DOCTYPE html><html><head><script type="text/required-ids">
127+
let parsed = parse_document_async(r#"<!DOCTYPE html><html><head><script type="text/required-ids">
124128
foo bar
125129
baz
126130
qux
127131
</script></head><body><div id="foo"></div><div id="bar"></div><div id="baz"></div><div id="qux"></div></body></html>
128-
"#.as_bytes()).await.unwrap();
132+
"#.as_bytes()).await?;
133+
let document = parsed.document().clone();
129134
let mut processor = Processor::new();
130135
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
131136
processor.apply().unwrap();
132137
let serialized = serialize_for_test(&[document]);
133138
assert!(!serialized.contains("text/required-ids"));
139+
Ok(())
134140
}
135141

136142
#[tokio::test]
137-
async fn errors_on_missing_ids() {
138-
let document = parse_document_async(r#"<!DOCTYPE html>
143+
async fn errors_on_missing_ids() -> io::Result<()> {
144+
let parsed = parse_document_async(r#"<!DOCTYPE html>
139145
<html><head><script type="text/required-ids">foo bar baz</script></head><body><div id="foo"></div></body></html>
140-
"#.as_bytes()).await.unwrap();
146+
"#.as_bytes()).await?;
147+
let document = parsed.document().clone();
141148
let mut processor = Processor::new();
142149
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
143150
let err = processor.apply().expect_err("expected missing IDs error");
144151
assert!(
145152
err.to_string()
146153
.contains("Missing required IDs for anchor permanence: bar, baz")
147154
);
155+
Ok(())
148156
}
149157

150158
#[tokio::test]
151159
#[should_panic(expected = "multiple required-ids scripts encountered")]
152160
async fn panics_on_multiple_required_ids_scripts() {
153-
let document = parse_document_async(r#"<!DOCTYPE html><html><head>
161+
let parsed = parse_document_async(r#"<!DOCTYPE html><html><head>
154162
<script type="text/required-ids">a b</script>
155163
<script type="text/required-ids">c d</script>
156164
</head><body><div id="a"></div><div id="b"></div><div id="c"></div><div id="d"></div></body></html>"#.as_bytes()).await.unwrap();
165+
let document = parsed.document().clone();
157166
let mut processor = Processor::new();
158167
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
159168
}

src/annotate_attributes.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ mod tests {
311311
// before and after the attributes table, to demonstrate that this is
312312
// not sensitive to which order they occur in (i.e., these could be
313313
// reordered in the HTML spec).
314-
let document = parse_document_async(
314+
let parsed = parse_document_async(
315315
r#"
316316
<!DOCTYPE html>
317317
<h3>The a element</h3>
@@ -333,6 +333,7 @@ mod tests {
333333
<dd><code data-x="attr-area-href">href</code>
334334
</dl>
335335
"#.trim().as_bytes()).await?;
336+
let document = parsed.document().clone();
336337
let mut proc = Processor::new();
337338
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
338339
proc.apply().await?;
@@ -368,7 +369,7 @@ mod tests {
368369
async fn test_variant() -> io::Result<()> {
369370
// This checks that <!-- variant --> and <!-- or: --> work correctly.
370371
// i.e., the variant description is used where requested
371-
let document = parse_document_async(
372+
let parsed = parse_document_async(
372373
r#"
373374
<!DOCTYPE html>
374375
<h3>The a element</h3>
@@ -386,6 +387,7 @@ mod tests {
386387
<dd><code data-x="attr-area-href">href</code><!-- variant -->
387388
</dl>
388389
"#.trim().as_bytes()).await?;
390+
let document = parsed.document().clone();
389391
let mut proc = Processor::new();
390392
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
391393
proc.apply().await?;
@@ -415,7 +417,7 @@ mod tests {
415417
#[tokio::test]
416418
async fn test_special_semantics() -> io::Result<()> {
417419
// Checks that the special rules for using : instead of an em dash work.
418-
let document = parse_document_async(
420+
let parsed = parse_document_async(
419421
r#"
420422
<!DOCTYPE html>
421423
<h3>The a element</h3>
@@ -428,6 +430,7 @@ mod tests {
428430
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Anchor name
429431
</tbody></table>
430432
"#.trim().as_bytes()).await?;
433+
let document = parsed.document().clone();
431434
let mut proc = Processor::new();
432435
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
433436
proc.apply().await?;
@@ -451,7 +454,7 @@ mod tests {
451454
#[tokio::test]
452455
async fn test_special_semantics_multiple() -> io::Result<()> {
453456
// Checks that the special rules for joining any special semantics with a ; work.
454-
let document = parse_document_async(
457+
let parsed = parse_document_async(
455458
r#"
456459
<!DOCTYPE html>
457460
<h3>The a element</h3>
@@ -465,6 +468,7 @@ mod tests {
465468
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Name of the anchor
466469
</tbody></table>
467470
"#.trim().as_bytes()).await?;
471+
let document = parsed.document().clone();
468472
let mut proc = Processor::new();
469473
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
470474
proc.apply().await?;
@@ -490,7 +494,7 @@ mod tests {
490494
async fn test_identical_links() -> io::Result<()> {
491495
// This checks the same identifier can be linked multiple times without
492496
// repeating the description.
493-
let document = parse_document_async(
497+
let parsed = parse_document_async(
494498
r#"
495499
<!DOCTYPE html>
496500
<h3>The img element</h3>
@@ -508,6 +512,7 @@ mod tests {
508512
<tr><th><code data-x>width</code><td><code data-x="attr-dim-width">img</code>; <code data-x="attr-dim-width">video</code><td>Horizontal dimension
509513
</tbody></table>
510514
"#.trim().as_bytes()).await?;
515+
let document = parsed.document().clone();
511516
let mut proc = Processor::new();
512517
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
513518
proc.apply().await?;

src/boilerplate.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,11 @@ mod tests {
166166
"<tr><td>en<td>English",
167167
)
168168
.await?;
169-
let document = parse_document_async(
169+
let parsed = parse_document_async(
170170
"<!DOCTYPE html><table><!--BOILERPLATE languages--></table>".as_bytes(),
171171
)
172172
.await?;
173+
let document = parsed.document().clone();
173174
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
174175
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
175176
proc.apply().await?;
@@ -188,10 +189,11 @@ mod tests {
188189
"data:text/html,Hello, world!",
189190
)
190191
.await?;
191-
let document = parse_document_async(
192+
let parsed = parse_document_async(
192193
"<!DOCTYPE html><a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes(),
193194
)
194195
.await?;
196+
let document = parsed.document().clone();
195197
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
196198
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
197199
proc.apply().await?;
@@ -208,9 +210,10 @@ mod tests {
208210
tokio::fs::write(example_dir.path().join("ex1"), "first").await?;
209211
tokio::fs::write(example_dir.path().join("ex2"), "second").await?;
210212
tokio::fs::write(example_dir.path().join("ignored"), "bad").await?;
211-
let document =
213+
let parsed =
212214
parse_document_async("<!DOCTYPE html><pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
213215
.await?;
216+
let document = parsed.document().clone();
214217
let mut proc = Processor::new(Path::new("."), example_dir.path());
215218
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
216219
proc.apply().await?;
@@ -229,7 +232,8 @@ mod tests {
229232
"<!DOCTYPE html><body><pre>EXAMPLE ../foo</pre>",
230233
];
231234
for example in bad_path_examples {
232-
let document = parse_document_async(example.as_bytes()).await?;
235+
let parsed = parse_document_async(example.as_bytes()).await?;
236+
let document = parsed.document().clone();
233237
let mut proc = Processor::new(Path::new("."), Path::new("."));
234238
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
235239
let result = proc.apply().await;

src/interface_index.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ mod tests {
186186

187187
#[tokio::test]
188188
async fn test_two_interfaces_in_one_block() -> io::Result<()> {
189-
let document = parse_document_async(
189+
let parsed = parse_document_async(
190190
r#"
191191
<!DOCTYPE html>
192192
<pre><code class=idl>
@@ -199,6 +199,7 @@ INSERT INTERFACES HERE
199199
.as_bytes(),
200200
)
201201
.await?;
202+
let document = parsed.document().clone();
202203
let mut proc = Processor::new();
203204
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
204205
proc.apply()?;
@@ -216,7 +217,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
216217

217218
#[tokio::test]
218219
async fn test_two_interfaces_in_separate_blocks() -> io::Result<()> {
219-
let document = parse_document_async(
220+
let parsed = parse_document_async(
220221
r#"
221222
<!DOCTYPE html>
222223
<pre><code class=idl>
@@ -231,6 +232,7 @@ INSERT INTERFACES HERE
231232
.as_bytes(),
232233
)
233234
.await?;
235+
let document = parsed.document().clone();
234236
let mut proc = Processor::new();
235237
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
236238
proc.apply()?;
@@ -250,7 +252,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
250252

251253
#[tokio::test]
252254
async fn interface_with_partial() -> io::Result<()> {
253-
let document = parse_document_async(
255+
let parsed = parse_document_async(
254256
r#"
255257
<!DOCTYPE html>
256258
<pre><code class=idl>
@@ -265,6 +267,7 @@ INSERT INTERFACES HERE
265267
.as_bytes(),
266268
)
267269
.await?;
270+
let document = parsed.document().clone();
268271
let mut proc = Processor::new();
269272
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
270273
proc.apply()?;
@@ -284,7 +287,7 @@ partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span
284287

285288
#[tokio::test]
286289
async fn interface_with_two_partials() -> io::Result<()> {
287-
let document = parse_document_async(
290+
let parsed = parse_document_async(
288291
r#"
289292
<!DOCTYPE html>
290293
<pre><code class=idl>
@@ -298,6 +301,7 @@ INSERT INTERFACES HERE
298301
.as_bytes(),
299302
)
300303
.await?;
304+
let document = parsed.document().clone();
301305
let mut proc = Processor::new();
302306
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
303307
proc.apply()?;
@@ -316,7 +320,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
316320

317321
#[tokio::test]
318322
async fn only_partials() -> io::Result<()> {
319-
let document = parse_document_async(
323+
let parsed = parse_document_async(
320324
r#"
321325
<!DOCTYPE html>
322326
<pre><code class=idl>
@@ -329,6 +333,7 @@ INSERT INTERFACES HERE
329333
.as_bytes(),
330334
)
331335
.await?;
336+
let document = parsed.document().clone();
332337
let mut proc = Processor::new();
333338
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
334339
proc.apply()?;
@@ -346,7 +351,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
346351

347352
#[tokio::test]
348353
async fn marker_before() -> io::Result<()> {
349-
let document = parse_document_async(
354+
let parsed = parse_document_async(
350355
r#"
351356
<!DOCTYPE html>
352357
INSERT INTERFACES HERE
@@ -358,6 +363,7 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
358363
.as_bytes(),
359364
)
360365
.await?;
366+
let document = parsed.document().clone();
361367
let mut proc = Processor::new();
362368
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
363369
proc.apply()?;
@@ -376,7 +382,8 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
376382

377383
#[tokio::test]
378384
async fn no_marker() -> io::Result<()> {
379-
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
385+
let parsed = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
386+
let document = parsed.document().clone();
380387
let mut proc = Processor::new();
381388
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
382389
let result = proc.apply();
@@ -386,11 +393,12 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
386393

387394
#[tokio::test]
388395
async fn duplicate_marker() -> io::Result<()> {
389-
let document = parse_document_async(
396+
let parsed = parse_document_async(
390397
"<!DOCTYPE html><div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>"
391398
.as_bytes(),
392399
)
393400
.await?;
401+
let document = parsed.document().clone();
394402
let mut proc = Processor::new();
395403
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
396404
let result = proc.apply();
@@ -400,7 +408,7 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
400408

401409
#[tokio::test]
402410
async fn duplicate_dfn() -> io::Result<()> {
403-
let document = parse_document_async(
411+
let parsed = parse_document_async(
404412
r#"
405413
<!DOCTYPE html>
406414
<pre><code class=idl>
@@ -411,6 +419,7 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
411419
.as_bytes(),
412420
)
413421
.await?;
422+
let document = parsed.document().clone();
414423
let mut proc = Processor::new();
415424
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
416425
let result = proc.apply();

src/main.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ async fn run_preprocess() -> io::Result<()> {
5151
// Because parsing can jump around the tree a little, it's most reasonable
5252
// to just parse the whole document before doing any processing. Even for
5353
// the HTML standard, this doesn't take too long.
54-
let document = parser::parse_document_async(tokio::io::stdin()).await?;
54+
let parsed = parser::parse_document_async(tokio::io::stdin()).await?;
55+
let document = parsed.document().clone();
5556

5657
let mut boilerplate = boilerplate::Processor::new(cache_dir.clone(), source_dir.join("demos"));
5758
let mut represents = represents::Processor::new();
@@ -92,7 +93,8 @@ async fn run_preprocess() -> io::Result<()> {
9293

9394
// The steps and considerations here are similar to run_preprocess.
9495
async fn run_postprocess() -> io::Result<()> {
95-
let document = parser::parse_document_async(tokio::io::stdin()).await?;
96+
let parsed = parser::parse_document_async(tokio::io::stdin()).await?;
97+
let document = parsed.document().clone();
9698

9799
let mut anchor_permanence = anchor_permanence::Processor::new();
98100

0 commit comments

Comments
 (0)