@@ -6,76 +6,74 @@ use unicode_normalization::UnicodeNormalization;
6
6
use crate :: { util:: avg, flow:: { Word , Rect } } ;
7
7
8
8
pub fn concat_text < ' a , E : Encoder + ' a > ( out : & mut String , items : impl Iterator < Item =& ' a TextSpan < E > > + Clone ) -> Vec < Word > {
9
+ let word_gap = analyze_word_gap ( items. clone ( ) ) ;
9
10
let mut words: Vec < Word > = vec ! [ ] ;
10
-
11
- // Calculate gaps between each char, the unit is em, relative to the font size.
12
- let gaps = items. clone ( )
13
- . flat_map ( |s| {
14
- // the transform matrix is from em space to device space
15
- // so we need to invert it
16
- let tr_inv = s. transform . matrix . inverse ( ) ;
17
- let pos = ( tr_inv * s. transform . vector ) . x ( ) ;
18
11
19
- s. chars . iter ( )
20
- . filter ( |c| !s. text [ c. offset ..] . chars ( ) . next ( ) . unwrap ( ) . is_whitespace ( ) )
21
- . map ( move |c| ( c. pos + pos, c. pos + pos + c. width , s. font_size ) )
22
- } )
23
- . tuple_windows ( )
24
- . filter ( |( a, b) | b. 0 > a. 0 )
25
- . map ( |( a, b) | ( b. 0 - a. 1 ) . max ( 0.01 ) . min ( 0.25 * ( a. 2 + b. 2 ) ) ) ;
26
-
27
- let font_size = avg ( items. clone ( ) . map ( |s| s. font_size ) ) . unwrap ( ) ;
28
- //gaps.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
29
- let space_gap = ( 0.5 * font_size) . min ( 2.0 * avg ( gaps) . unwrap_or ( 0.0 ) ) ; //2.0 * gaps[gaps.len()/2];
30
-
31
12
let mut end = 0. ; // trailing edge of the last char
32
- // out中最后一个字符是否是空格
13
+
14
+ // Whether the last processed TextChar is a space
33
15
let mut trailing_space = out. chars ( ) . last ( ) . map ( |c| c. is_whitespace ( ) ) . unwrap_or ( true ) ;
16
+
34
17
let mut word_start_pos = 0.0 ;
18
+ let mut word_end_pos = 0.0 ;
19
+
35
20
let mut word_start_idx = out. len ( ) ;
36
21
let mut y_min = f32:: INFINITY ;
37
22
let mut y_max = -f32:: INFINITY ;
38
23
let mut word_start = true ;
39
- let mut word_end = 0.0 ;
40
24
41
25
for span in items {
42
- let mut pos = 0 ; // byte index of last char into span.text
26
+ let mut offset = 0 ; // byte index of last char into span.text
43
27
let tr_inv = span. transform . matrix . inverse ( ) ;
44
28
let x_off = ( tr_inv * span. transform . vector ) . x ( ) ;
45
29
46
- for c in span. chars . iter ( ) {
47
- // current string of TextChar
48
- let s = & span. text [ pos..c. offset ] ;
49
- if c. offset > 0 {
50
- let is_whitespace = s. chars ( ) . all ( |c| c. is_whitespace ( ) ) ;
51
- // 在不为空格的时候, 将 s 写入 out.
52
- if !trailing_space || !is_whitespace {
53
- out. extend ( s. nfkc ( ) ) ;
30
+ let chars = span. chars . as_slice ( ) ;
31
+ for ( i, c) in chars. iter ( ) . enumerate ( ) {
32
+ let next_offset = chars. get ( i + 1 ) . map_or ( span. text . len ( ) , |next| next. offset ) ;
33
+ let s: & str = & span. text [ offset..next_offset] ;
34
+
35
+ out. extend ( s. nfkc ( ) ) ;
36
+
37
+ let is_whitespace = s. chars ( ) . all ( |c| c. is_whitespace ( ) ) ;
38
+ let len = s. chars ( ) . count ( ) ;
39
+ if trailing_space {
40
+ if !is_whitespace {
41
+ word_start = true ;
42
+ word_start_idx = out. len ( ) - len;
54
43
}
55
44
trailing_space = is_whitespace;
45
+ } else {
46
+ trailing_space = is_whitespace;
47
+ if is_whitespace {
48
+ words. push ( Word {
49
+ text : out[ word_start_idx..out. len ( ) -len] . into ( ) ,
50
+ rect : Rect {
51
+ x : word_start_pos,
52
+ y : y_min,
53
+ h : y_max - y_min,
54
+ w : word_end_pos - word_start_pos
55
+ }
56
+ } ) ;
57
+ } else if c. pos + x_off > end + word_gap {
58
+ words. push ( Word {
59
+ text : out[ word_start_idx..] . into ( ) ,
60
+ rect : Rect {
61
+ x : word_start_pos,
62
+ y : y_min,
63
+ h : y_max - y_min,
64
+ w : word_end_pos - word_start_pos
65
+ }
66
+ } ) ;
67
+
68
+ out. push ( ' ' ) ;
69
+ trailing_space = true ;
70
+ word_start = true ;
71
+ word_start_idx = out. len ( ) - 1 ;
72
+ }
56
73
}
57
- // 在 s 不为空格,且有gap 的时候,记录一个 word.
58
- if !trailing_space && c. pos + x_off > end + space_gap {
59
- words. push ( Word {
60
- text : out[ word_start_idx..] . into ( ) ,
61
- rect : Rect {
62
- x : word_start_pos,
63
- y : y_min,
64
- h : y_max - y_min,
65
- w : word_end - word_start_pos
66
- }
67
- } ) ;
68
-
69
- out. push ( ' ' ) ;
70
- trailing_space = true ;
71
- word_start = true ;
72
- word_start_idx = out. len ( ) ;
73
- }
74
- pos = c. offset ;
74
+
75
75
end = c. pos + x_off + c. width ;
76
- if c. offset == 0 || !trailing_space {
77
- word_end = ( span. transform . matrix * Vector2F :: new ( end, 0.0 ) ) . x ( ) ;
78
- }
76
+ word_end_pos = ( span. transform . matrix * Vector2F :: new ( end, 0.0 ) ) . x ( ) ;
79
77
80
78
if word_start {
81
79
y_min = span. rect . min_y ( ) ;
@@ -86,25 +84,68 @@ pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<I
86
84
y_min = y_min. min ( span. rect . min_y ( ) ) ;
87
85
y_max = y_max. max ( span. rect . max_y ( ) ) ;
88
86
}
89
- }
90
-
91
- trailing_space = span. text [ pos..] . chars ( ) . all ( |c| c. is_whitespace ( ) ) ;
92
87
93
- out. extend ( span. text [ pos..] . nfkc ( ) ) ;
88
+ offset = next_offset;
89
+ }
94
90
}
91
+
95
92
words. push ( Word {
96
93
text : out[ word_start_idx..] . into ( ) ,
97
94
rect : Rect {
98
95
x : word_start_pos,
99
96
y : y_min,
100
97
h : y_max - y_min,
101
- w : word_end - word_start_pos
98
+ w : word_end_pos - word_start_pos
102
99
}
103
100
} ) ;
104
-
101
+
105
102
words
106
103
}
107
104
105
+ /// Calculate gaps between each char,
106
+ /// The most important thing here is to make sure the gap is bigger than char gap, and less than word gap.
107
+ ///
108
+ /// for example:
109
+ /// think of something like "ab ____________c de"
110
+ ///
111
+ /// a-b has a zero space (or 0.01)
112
+ /// b-c has a huge space of 10
113
+ /// c-d has 0.2
114
+ /// d-e has 0.01
115
+ /// if we just take the average = 10.2 and divide that by 4 we get 2.5
116
+ /// and now c-d is smaller than that and not classified as a space
117
+ /// but if b-c is capped by the threshold of 0.5, the sum is 0.7, and the avg is 0.7/4 ~ 0.18
118
+ /// and everything is fine.
119
+
120
+ /// 0 + min(0.5, 10) + 0.2 + 0
121
+ /// 10 capped at 0.5 is0.5
122
+ /// min(0, 0.5) + min(10, 0.5) + min(0.2, 0.5) + min(0, 0.5)
123
+ /// 0 + 0.5 + 0.2 + 0
124
+ /// every value is limited to be at least 0.01 and not more than 0.5.
125
+ /// the 0.5 is 0.25 * font size of the left char and 0.25 * font size of the right char
126
+ /// if they are the same font size it is 0.5
127
+ fn analyze_word_gap < ' a , E : Encoder + ' a > ( items : impl Iterator < Item =& ' a TextSpan < E > > + Clone ) -> f32 {
128
+ let gaps = items. clone ( )
129
+ . flat_map ( |s| {
130
+ // the transform matrix is from em space to device space
131
+ // so we need to invert it
132
+ let tr_inv = s. transform . matrix . inverse ( ) ;
133
+ let pos = ( tr_inv * s. transform . vector ) . x ( ) ;
134
+
135
+ s. chars . iter ( )
136
+ . filter ( |c| !s. text [ c. offset ..] . chars ( ) . next ( ) . unwrap ( ) . is_whitespace ( ) )
137
+ . map ( move |c| ( c. pos + pos, c. pos + pos + c. width , s. font_size ) )
138
+ } )
139
+ . tuple_windows ( )
140
+ . filter ( |( a, b) | b. 0 > a. 0 )
141
+ . map ( |( a, b) | ( b. 0 - a. 1 ) . max ( 0.01 ) . min ( 0.25 * ( a. 2 + b. 2 ) ) ) ;
142
+
143
+ let avg_font_size = avg ( items. clone ( ) . map ( |s| s. font_size ) ) . unwrap ( ) ;
144
+ //gaps.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
145
+
146
+ ( 0.5 * avg_font_size) . min ( 2.0 * avg ( gaps) . unwrap_or ( 0.0 ) ) //2.0 * gaps[gaps.len()/2];
147
+ }
148
+
108
149
#[ cfg( test) ]
109
150
mod tests {
110
151
use pathfinder_geometry:: { rect:: RectF , transform2d:: Transform2F } ;
0 commit comments