Skip to content

Commit e68b23d

Browse files
author
vidy
committed
Add WordBuilder struct to build word
1 parent c4cccba commit e68b23d

File tree

2 files changed

+122
-111
lines changed

2 files changed

+122
-111
lines changed

src/flow.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub struct Word {
2020

2121
#[derive(Serialize, Deserialize, Debug)]
2222
pub struct Char {
23-
pub offset: i32,
23+
pub offset: usize,
2424
pub pos: f32,
2525
pub width: f32,
2626
}

src/text.rs

Lines changed: 121 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -9,141 +9,152 @@ use crate::{flow::{Char, Rect, Word}, util::avg};
99

1010
pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan<E>> + Clone) -> Vec<Word> {
1111
let word_gap = analyze_word_gap(items.clone());
12-
let mut words: Vec<Word> = vec![];
13-
14-
let mut end = 0.; // trailing edge of the last char
15-
12+
let mut words = Vec::new();
13+
let mut current_word = WordBuilder::new(out.len());
14+
1615
// Whether the last processed TextChar is a whitespace
1716
// ' ' Space
1817
// '\t' Tab
1918
// '\n' Line feed
2019
// '\r' Carriage return
2120
// '\u{00A0}' Non-breaking space
22-
let mut trailing_space = out.chars().last().map(|c| c.is_whitespace()).unwrap_or(true);
23-
24-
let mut word_start_idx = out.len();
25-
26-
// For calculating the layout(position, width , height) of a word
27-
let mut word_start_pos = 0.0;
28-
let mut word_end_pos = 0.0;
29-
let mut y_min = f32::INFINITY;
30-
let mut y_max = -f32::INFINITY;
31-
32-
let mut word_start = true;
33-
let mut word_chars = vec![];
34-
let mut word_char_idx = 0;
21+
let mut trailing_space = out.chars().last().map_or(true, |c| c.is_whitespace());
3522

3623
for span in items {
37-
let mut offset = 0; // byte index of last char into span.text
24+
let mut offset = 0;
3825
let tr_inv = span.transform.matrix.inverse();
3926
let x_off = (tr_inv * span.transform.vector).x();
40-
27+
4128
let mut chars = span.chars.iter().peekable();
4229
while let Some(current) = chars.next() {
43-
let s;
44-
if let Some(next) = chars.peek() {
45-
s = &span.text[offset..next.offset];
30+
// Get text for current char
31+
let text = if let Some(next) = chars.peek() {
32+
let s = &span.text[offset..next.offset];
4633
offset = next.offset;
34+
s
4735
} else {
48-
s = &span.text[offset..];
36+
&span.text[offset..]
37+
};
38+
39+
// Calculate char positions
40+
let char_start = (span.transform.matrix * Vector2F::new(current.pos + x_off, 0.0)).x();
41+
let char_end = (span.transform.matrix * Vector2F::new(current.pos + x_off + current.width, 0.0)).x();
42+
43+
let is_whitespace = text.chars().all(|c| c.is_whitespace());
44+
45+
// Handle word boundaries
46+
if trailing_space && !is_whitespace {
47+
// Start new word after space
48+
current_word.start_new(out.len(), char_start);
49+
current_word.add_char(0, char_start, char_end);
50+
out.extend(text.nfkc());
51+
} else if !trailing_space {
52+
if is_whitespace {
53+
// End word at space
54+
words.push(current_word.build(out, char_end));
55+
current_word = WordBuilder::new(out.len());
56+
out.push(' ');
57+
} else if current.pos + x_off > current_word.end_pos + word_gap {
58+
// End word at large gap
59+
words.push(current_word.build(out, char_end));
60+
61+
current_word = WordBuilder::new(out.len());
62+
current_word.start_new(out.len(), char_start);
63+
current_word.add_char(0, char_start, char_end);
64+
out.extend(text.nfkc());
65+
} else {
66+
// Continue current word
67+
current_word.add_char(current_word.char_count, char_start, char_end);
68+
out.extend(text.nfkc());
69+
}
4970
}
50-
end = current.pos + x_off + current.width;
5171

52-
let char_start_pos = (span.transform.matrix * Vector2F::new(current.pos + x_off, 0.0)).x();
53-
let char_end_pos = (span.transform.matrix * Vector2F::new(end, 0.0)).x();
72+
trailing_space = is_whitespace;
73+
current_word.update_bounds(span.rect.min_y(), span.rect.max_y());
74+
}
75+
}
5476

55-
let is_whitespace = s.chars().all(|c| c.is_whitespace());
77+
// Add final word if any
78+
if !current_word.is_empty() {
79+
let end_pos = current_word.end_pos;
80+
words.push(current_word.build(out, end_pos));
81+
}
5682

57-
if trailing_space {
58-
if !is_whitespace {
59-
word_start = true;
60-
word_start_idx = out.len();
83+
words
84+
}
6185

62-
word_chars.push(Char {
63-
offset: 0,
64-
pos: char_start_pos,
65-
width: char_end_pos - char_start_pos,
66-
});
67-
out.extend(s.nfkc());
86+
// Helper struct to build up words
87+
struct WordBuilder {
88+
word_start_idx: usize,
6889

69-
word_char_idx += 1;
70-
}
71-
} else {
72-
if is_whitespace {
73-
words.push(Word {
74-
text: out[word_start_idx..].into(),
75-
rect: Rect {
76-
x: word_start_pos,
77-
y: y_min,
78-
h: y_max - y_min,
79-
w: word_end_pos - word_start_pos
80-
},
81-
chars: take(&mut word_chars)
82-
});
83-
out.push_str(" ");
84-
word_start_idx = out.len();
85-
word_char_idx = 0;
86-
} else if current.pos + x_off > end + word_gap {
87-
words.push(Word {
88-
text: out[word_start_idx..].into(),
89-
rect: Rect {
90-
x: word_start_pos,
91-
y: y_min,
92-
h: y_max - y_min,
93-
w: word_end_pos - word_start_pos
94-
},
95-
chars: take(&mut word_chars)
96-
});
97-
98-
word_start = true;
99-
word_start_idx = out.len();
100-
word_chars.push(Char {
101-
offset: 0,
102-
pos: char_start_pos,
103-
width: char_end_pos - char_start_pos,
104-
});
105-
word_char_idx += 1;
106-
107-
out.extend(s.nfkc());
108-
} else {
109-
word_chars.push(Char {
110-
offset: word_char_idx,
111-
pos: char_start_pos,
112-
width: char_end_pos - char_start_pos,
113-
});
114-
115-
word_char_idx += 1;
116-
out.extend(s.nfkc());
117-
}
118-
}
119-
trailing_space = is_whitespace;
90+
// For calculating the layout(position, width , height) of a word
91+
start_pos: f32,
92+
end_pos: f32, // trailing edge of the last char
93+
y_min: f32,
94+
y_max: f32,
95+
96+
chars: Vec<Char>,
97+
char_count: usize,
98+
started: bool,
99+
}
120100

121-
word_end_pos = char_end_pos;
101+
impl WordBuilder {
102+
fn new(word_start_idx: usize) -> Self {
103+
Self {
104+
word_start_idx,
105+
start_pos: 0.0,
106+
end_pos: 0.0,
107+
y_min: f32::INFINITY,
108+
y_max: -f32::INFINITY,
109+
chars: Vec::new(),
110+
char_count: 0,
111+
started: false,
112+
}
113+
}
122114

123-
if word_start {
124-
y_min = span.rect.min_y();
125-
y_max = span.rect.max_y();
126-
word_start_pos = char_start_pos;
127-
word_start = false;
128-
} else {
129-
y_min = y_min.min(span.rect.min_y());
130-
y_max = y_max.max(span.rect.max_y());
131-
}
115+
fn start_new(&mut self, word_start_idx: usize, start_pos: f32) {
116+
self.word_start_idx = word_start_idx;
117+
self.start_pos = start_pos;
118+
self.started = true;
119+
}
120+
121+
fn add_char(&mut self, offset: usize, start: f32, end: f32) {
122+
self.chars.push(Char {
123+
offset,
124+
pos: start,
125+
width: end - start,
126+
});
127+
self.end_pos = end;
128+
self.char_count += 1;
129+
}
130+
131+
fn update_bounds(&mut self, min_y: f32, max_y: f32) {
132+
if !self.started {
133+
self.y_min = min_y;
134+
self.y_max = max_y;
135+
self.started = true;
136+
} else {
137+
self.y_min = self.y_min.min(min_y);
138+
self.y_max = self.y_max.max(max_y);
132139
}
133140
}
134141

135-
words.push(Word {
136-
text: out[word_start_idx..].into(),
137-
rect: Rect {
138-
x: word_start_pos,
139-
y: y_min,
140-
h: y_max - y_min,
141-
w: word_end_pos - word_start_pos
142-
},
143-
chars: take(&mut word_chars)
144-
});
145-
146-
words
142+
fn is_empty(&self) -> bool {
143+
self.chars.is_empty()
144+
}
145+
146+
fn build(mut self, out: &str, end_pos: f32) -> Word {
147+
Word {
148+
text: out[self.word_start_idx..].into(),
149+
rect: Rect {
150+
x: self.start_pos,
151+
y: self.y_min,
152+
h: self.y_max - self.y_min,
153+
w: end_pos - self.start_pos
154+
},
155+
chars: take(&mut self.chars)
156+
}
157+
}
147158
}
148159

149160
/// Calculate gaps between each char,

0 commit comments

Comments
 (0)