4
4
// file that was distributed with this source code.
5
5
6
6
use divan:: { Bencher , black_box} ;
7
- use std:: path:: PathBuf ;
8
7
use uu_sort:: uumain;
9
- use uucore:: benchmark:: { create_test_file, run_util_function} ;
10
-
11
- /// Helper function to generate test data from a list of words
12
- fn generate_data_from_words ( words : & [ & str ] , num_lines : usize ) -> Vec < u8 > {
13
- let mut data = Vec :: new ( ) ;
14
- for i in 0 ..num_lines {
15
- let word = words[ i % words. len ( ) ] ;
16
- let number = i % 1000 ;
17
- data. extend_from_slice ( format ! ( "{word}_{number:03}\n " ) . as_bytes ( ) ) ;
18
- }
19
- data
20
- }
21
-
22
- /// Helper function to generate test data from a list of words without number suffix
23
- fn generate_data_from_words_simple ( words : & [ & str ] , num_lines : usize ) -> Vec < u8 > {
24
- let mut data = Vec :: new ( ) ;
25
- for i in 0 ..num_lines {
26
- let word = words[ i % words. len ( ) ] ;
27
- data. extend_from_slice ( format ! ( "{word}\n " ) . as_bytes ( ) ) ;
28
- }
29
- data
30
- }
31
-
32
- /// Generate test data with ASCII-only text
33
- fn generate_ascii_data ( num_lines : usize ) -> Vec < u8 > {
34
- let words = [
35
- "apple" ,
36
- "banana" ,
37
- "cherry" ,
38
- "date" ,
39
- "elderberry" ,
40
- "fig" ,
41
- "grape" ,
42
- "honeydew" ,
43
- "kiwi" ,
44
- "lemon" ,
45
- "mango" ,
46
- "nectarine" ,
47
- "orange" ,
48
- "papaya" ,
49
- "quince" ,
50
- "raspberry" ,
51
- "strawberry" ,
52
- "tangerine" ,
53
- "ugli" ,
54
- "vanilla" ,
55
- "watermelon" ,
56
- "xigua" ,
57
- "yellow" ,
58
- "zucchini" ,
59
- "avocado" ,
60
- ] ;
61
-
62
- generate_data_from_words ( & words, num_lines)
63
- }
64
-
65
- /// Generate test data with accented characters that require locale-aware sorting
66
- fn generate_accented_data ( num_lines : usize ) -> Vec < u8 > {
67
- let words = [
68
- // French words with accents
69
- "café" ,
70
- "naïve" ,
71
- "résumé" ,
72
- "fiancé" ,
73
- "crème" ,
74
- "déjà" ,
75
- "façade" ,
76
- "château" ,
77
- "élève" ,
78
- "côte" ,
79
- // German words with umlauts
80
- "über" ,
81
- "Müller" ,
82
- "schön" ,
83
- "Köln" ,
84
- "Düsseldorf" ,
85
- "Österreich" ,
86
- "Zürich" ,
87
- "Mädchen" ,
88
- "Bär" ,
89
- "größer" ,
90
- // Spanish words with tildes and accents
91
- "niño" ,
92
- "señor" ,
93
- "año" ,
94
- "mañana" ,
95
- "español" ,
96
- "corazón" ,
97
- "María" ,
98
- "José" ,
99
- "más" ,
100
- "también" ,
101
- ] ;
102
-
103
- generate_data_from_words ( & words, num_lines)
104
- }
105
-
106
- /// Generate test data with mixed ASCII and non-ASCII characters
107
- fn generate_mixed_data ( num_lines : usize ) -> Vec < u8 > {
108
- let words = [
109
- // Mix of ASCII and accented words
110
- "apple" ,
111
- "café" ,
112
- "banana" ,
113
- "naïve" ,
114
- "cherry" ,
115
- "résumé" ,
116
- "date" ,
117
- "fiancé" ,
118
- "elderberry" ,
119
- "crème" ,
120
- "über" ,
121
- "grape" ,
122
- "Müller" ,
123
- "honeydew" ,
124
- "schön" ,
125
- "niño" ,
126
- "kiwi" ,
127
- "señor" ,
128
- "lemon" ,
129
- "año" ,
130
- "mango" ,
131
- "María" ,
132
- "orange" ,
133
- "José" ,
134
- "papaya" ,
135
- ] ;
136
-
137
- generate_data_from_words ( & words, num_lines)
138
- }
139
-
140
- /// Generate test data with uppercase/lowercase variations
141
- fn generate_case_sensitive_data ( num_lines : usize ) -> Vec < u8 > {
142
- let base_words = [
143
- "apple" , "Apple" , "APPLE" , "banana" , "Banana" , "BANANA" , "café" , "Café" , "CAFÉ" , "über" ,
144
- "Über" , "ÜBER" ,
145
- ] ;
146
-
147
- generate_data_from_words_simple ( & base_words, num_lines)
148
- }
149
-
150
- fn setup_test_file ( data : & [ u8 ] ) -> PathBuf {
151
- let temp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
152
- let file_path = create_test_file ( data, temp_dir. path ( ) ) ;
153
- // Keep temp_dir alive by leaking it - the OS will clean it up
154
- std:: mem:: forget ( temp_dir) ;
155
- file_path
156
- }
8
+ use uucore:: benchmark:: { run_util_function, setup_test_file, text_data} ;
157
9
158
10
/// Benchmark sorting ASCII-only data
159
11
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
160
12
fn sort_ascii_only ( bencher : Bencher , num_lines : usize ) {
161
- let data = generate_ascii_data ( num_lines) ;
13
+ let data = text_data :: generate_ascii_data ( num_lines) ;
162
14
let file_path = setup_test_file ( & data) ;
163
15
164
16
bencher. bench ( || {
@@ -169,7 +21,7 @@ fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
169
21
/// Benchmark sorting accented/non-ASCII data
170
22
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
171
23
fn sort_accented_data ( bencher : Bencher , num_lines : usize ) {
172
- let data = generate_accented_data ( num_lines) ;
24
+ let data = text_data :: generate_accented_data ( num_lines) ;
173
25
let file_path = setup_test_file ( & data) ;
174
26
175
27
bencher. bench ( || {
@@ -180,7 +32,7 @@ fn sort_accented_data(bencher: Bencher, num_lines: usize) {
180
32
/// Benchmark sorting mixed ASCII/non-ASCII data
181
33
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
182
34
fn sort_mixed_data ( bencher : Bencher , num_lines : usize ) {
183
- let data = generate_mixed_data ( num_lines) ;
35
+ let data = text_data :: generate_mixed_data ( num_lines) ;
184
36
let file_path = setup_test_file ( & data) ;
185
37
186
38
bencher. bench ( || {
@@ -191,7 +43,7 @@ fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
191
43
/// Benchmark case-sensitive sorting with mixed case data
192
44
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
193
45
fn sort_case_sensitive ( bencher : Bencher , num_lines : usize ) {
194
- let data = generate_case_sensitive_data ( num_lines) ;
46
+ let data = text_data :: generate_case_sensitive_data ( num_lines) ;
195
47
let file_path = setup_test_file ( & data) ;
196
48
197
49
bencher. bench ( || {
@@ -202,7 +54,7 @@ fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
202
54
/// Benchmark case-insensitive sorting (fold case)
203
55
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
204
56
fn sort_case_insensitive ( bencher : Bencher , num_lines : usize ) {
205
- let data = generate_case_sensitive_data ( num_lines) ;
57
+ let data = text_data :: generate_case_sensitive_data ( num_lines) ;
206
58
let file_path = setup_test_file ( & data) ;
207
59
208
60
bencher. bench ( || {
@@ -216,7 +68,7 @@ fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
216
68
/// Benchmark dictionary order sorting (only blanks and alphanumeric)
217
69
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
218
70
fn sort_dictionary_order ( bencher : Bencher , num_lines : usize ) {
219
- let data = generate_mixed_data ( num_lines) ;
71
+ let data = text_data :: generate_mixed_data ( num_lines) ;
220
72
let file_path = setup_test_file ( & data) ;
221
73
222
74
bencher. bench ( || {
@@ -251,7 +103,7 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) {
251
103
/// Benchmark reverse sorting with locale-aware data
252
104
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
253
105
fn sort_reverse_locale ( bencher : Bencher , num_lines : usize ) {
254
- let data = generate_accented_data ( num_lines) ;
106
+ let data = text_data :: generate_accented_data ( num_lines) ;
255
107
let file_path = setup_test_file ( & data) ;
256
108
257
109
bencher. bench ( || {
@@ -290,7 +142,7 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) {
290
142
/// Benchmark unique sorting with locale-aware data
291
143
#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
292
144
fn sort_unique_locale ( bencher : Bencher , num_lines : usize ) {
293
- let data = generate_accented_data ( num_lines) ;
145
+ let data = text_data :: generate_accented_data ( num_lines) ;
294
146
let file_path = setup_test_file ( & data) ;
295
147
296
148
bencher. bench ( || {
0 commit comments