@@ -7,12 +7,15 @@ use crate::{util::avg, flow::{Word, Rect}};
7
7
8
8
pub fn concat_text < ' a , E : Encoder + ' a > ( out : & mut String , items : impl Iterator < Item =& ' a TextSpan < E > > + Clone ) -> Vec < Word > {
9
9
let mut words: Vec < Word > = vec ! [ ] ;
10
- // dbg!(items.clone().map(|s| s).collect::<Vec<_>>());
11
- // gaps between each char
10
+
11
+ // Calculate gaps between each char, the unit is em, relative to the font size.
12
12
let gaps = items. clone ( )
13
13
. flat_map ( |s| {
14
+ // the transform matrix is from em space to device space
15
+ // so we need to invert it
14
16
let tr_inv = s. transform . matrix . inverse ( ) ;
15
17
let pos = ( tr_inv * s. transform . vector ) . x ( ) ;
18
+
16
19
s. chars . iter ( )
17
20
. filter ( |c| !s. text [ c. offset ..] . chars ( ) . next ( ) . unwrap ( ) . is_whitespace ( ) )
18
21
. map ( move |c| ( c. pos + pos, c. pos + pos + c. width , s. font_size ) )
@@ -26,6 +29,7 @@ pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<I
26
29
let space_gap = ( 0.5 * font_size) . min ( 2.0 * avg ( gaps) . unwrap_or ( 0.0 ) ) ; //2.0 * gaps[gaps.len()/2];
27
30
28
31
let mut end = 0. ; // trailing edge of the last char
32
+ // out中最后一个字符是否是空格
29
33
let mut trailing_space = out. chars ( ) . last ( ) . map ( |c| c. is_whitespace ( ) ) . unwrap_or ( true ) ;
30
34
let mut word_start_pos = 0.0 ;
31
35
let mut word_start_idx = out. len ( ) ;
@@ -38,16 +42,19 @@ pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<I
38
42
let mut pos = 0 ; // byte index of last char into span.text
39
43
let tr_inv = span. transform . matrix . inverse ( ) ;
40
44
let x_off = ( tr_inv * span. transform . vector ) . x ( ) ;
45
+
41
46
for c in span. chars . iter ( ) {
42
-
47
+ // current string of TextChar
43
48
let s = & span. text [ pos..c. offset ] ;
44
49
if c. offset > 0 {
45
50
let is_whitespace = s. chars ( ) . all ( |c| c. is_whitespace ( ) ) ;
51
+ // 在不为空格的时候, 将 s 写入 out.
46
52
if !trailing_space || !is_whitespace {
47
53
out. extend ( s. nfkc ( ) ) ;
48
54
}
49
55
trailing_space = is_whitespace;
50
56
}
57
+ // 在 s 不为空格,且有gap 的时候,记录一个 word.
51
58
if !trailing_space && c. pos + x_off > end + space_gap {
52
59
words. push ( Word {
53
60
text : out[ word_start_idx..] . into ( ) ,
@@ -80,6 +87,7 @@ pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<I
80
87
y_max = y_max. max ( span. rect . max_y ( ) ) ;
81
88
}
82
89
}
90
+
83
91
trailing_space = span. text [ pos..] . chars ( ) . all ( |c| c. is_whitespace ( ) ) ;
84
92
85
93
out. extend ( span. text [ pos..] . nfkc ( ) ) ;
@@ -95,4 +103,51 @@ pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<I
95
103
} ) ;
96
104
97
105
words
106
+ }
107
+
108
+ #[ cfg( test) ]
109
+ mod tests {
110
+ use pathfinder_geometry:: { rect:: RectF , transform2d:: Transform2F } ;
111
+ use pdf_render:: { font:: OutlineBuilder , Fill , TextChar } ;
112
+
113
+ use super :: * ;
114
+
115
+ #[ test]
116
+ fn test_concat_text ( ) {
117
+ let text_span: TextSpan < OutlineBuilder > = TextSpan {
118
+ rect : RectF :: from_points ( Vector2F :: new ( 56.8 , 55.85077 ) , Vector2F :: new ( 136.26399 , 67.85077 ) ) ,
119
+ width : 79.464 ,
120
+ bbox : None ,
121
+ font_size : 12.0 ,
122
+ font : None ,
123
+ text : "hello world" . to_string ( ) ,
124
+ chars : vec ! [
125
+ TextChar { offset: 0 , pos: 0.0 , width: 7.224001 } ,
126
+ TextChar { offset: 1 , pos: 7.224001 , width: 7.224001 } ,
127
+ TextChar { offset: 2 , pos: 14.448002 , width: 7.224001 } ,
128
+ TextChar { offset: 3 , pos: 21.672003 , width: 7.224001 } ,
129
+ TextChar { offset: 4 , pos: 28.896004 , width: 7.224001 } ,
130
+ TextChar { offset: 5 , pos: 36.120003 , width: 7.224001 } ,
131
+ TextChar { offset: 6 , pos: 43.344 , width: 7.224001 } ,
132
+ TextChar { offset: 7 , pos: 50.568 , width: 7.224001 } ,
133
+ TextChar { offset: 8 , pos: 57.792 , width: 7.224001 } ,
134
+ TextChar { offset: 9 , pos: 65.016 , width: 7.224001 } ,
135
+ TextChar { offset: 10 , pos: 72.24 , width: 7.224001 } ,
136
+ ] ,
137
+ color : Fill :: Solid ( 0.0 , 0.5019608 , 0.0 ) ,
138
+ alpha : 1.0 ,
139
+ transform : Transform2F :: row_major ( 1.0 , 0.0 , 56.8 , 0.0 , 1.0 , 67.85077 ) ,
140
+ mode : pdf:: content:: TextMode :: Fill ,
141
+ op_nr : 18 ,
142
+ } ;
143
+
144
+ let mut output = String :: new ( ) ;
145
+ let words = concat_text ( & mut output, vec ! [ & text_span] . into_iter ( ) ) ;
146
+
147
+ // Assert the concatenated text
148
+ assert_eq ! ( output, "hello world" ) ;
149
+
150
+ // Assert the words
151
+ assert_eq ! ( words. len( ) , 2 ) ; // Expect two words: "hello" and "world"
152
+ }
98
153
}
0 commit comments