44// file that was distributed with this source code.
55
66use divan:: { Bencher , black_box} ;
7- use std:: path:: PathBuf ;
87use uu_sort:: uumain;
9- use uucore:: benchmark:: { create_test_file, run_util_function} ;
10-
11- /// Helper function to generate test data from a list of words
12- fn generate_data_from_words ( words : & [ & str ] , num_lines : usize ) -> Vec < u8 > {
13- let mut data = Vec :: new ( ) ;
14- for i in 0 ..num_lines {
15- let word = words[ i % words. len ( ) ] ;
16- let number = i % 1000 ;
17- data. extend_from_slice ( format ! ( "{word}_{number:03}\n " ) . as_bytes ( ) ) ;
18- }
19- data
20- }
21-
22- /// Helper function to generate test data from a list of words without number suffix
23- fn generate_data_from_words_simple ( words : & [ & str ] , num_lines : usize ) -> Vec < u8 > {
24- let mut data = Vec :: new ( ) ;
25- for i in 0 ..num_lines {
26- let word = words[ i % words. len ( ) ] ;
27- data. extend_from_slice ( format ! ( "{word}\n " ) . as_bytes ( ) ) ;
28- }
29- data
30- }
31-
32- /// Generate test data with ASCII-only text
33- fn generate_ascii_data ( num_lines : usize ) -> Vec < u8 > {
34- let words = [
35- "apple" ,
36- "banana" ,
37- "cherry" ,
38- "date" ,
39- "elderberry" ,
40- "fig" ,
41- "grape" ,
42- "honeydew" ,
43- "kiwi" ,
44- "lemon" ,
45- "mango" ,
46- "nectarine" ,
47- "orange" ,
48- "papaya" ,
49- "quince" ,
50- "raspberry" ,
51- "strawberry" ,
52- "tangerine" ,
53- "ugli" ,
54- "vanilla" ,
55- "watermelon" ,
56- "xigua" ,
57- "yellow" ,
58- "zucchini" ,
59- "avocado" ,
60- ] ;
61-
62- generate_data_from_words ( & words, num_lines)
63- }
64-
65- /// Generate test data with accented characters that require locale-aware sorting
66- fn generate_accented_data ( num_lines : usize ) -> Vec < u8 > {
67- let words = [
68- // French words with accents
69- "café" ,
70- "naïve" ,
71- "résumé" ,
72- "fiancé" ,
73- "crème" ,
74- "déjà" ,
75- "façade" ,
76- "château" ,
77- "élève" ,
78- "côte" ,
79- // German words with umlauts
80- "über" ,
81- "Müller" ,
82- "schön" ,
83- "Köln" ,
84- "Düsseldorf" ,
85- "Österreich" ,
86- "Zürich" ,
87- "Mädchen" ,
88- "Bär" ,
89- "größer" ,
90- // Spanish words with tildes and accents
91- "niño" ,
92- "señor" ,
93- "año" ,
94- "mañana" ,
95- "español" ,
96- "corazón" ,
97- "María" ,
98- "José" ,
99- "más" ,
100- "también" ,
101- ] ;
102-
103- generate_data_from_words ( & words, num_lines)
104- }
105-
106- /// Generate test data with mixed ASCII and non-ASCII characters
107- fn generate_mixed_data ( num_lines : usize ) -> Vec < u8 > {
108- let words = [
109- // Mix of ASCII and accented words
110- "apple" ,
111- "café" ,
112- "banana" ,
113- "naïve" ,
114- "cherry" ,
115- "résumé" ,
116- "date" ,
117- "fiancé" ,
118- "elderberry" ,
119- "crème" ,
120- "über" ,
121- "grape" ,
122- "Müller" ,
123- "honeydew" ,
124- "schön" ,
125- "niño" ,
126- "kiwi" ,
127- "señor" ,
128- "lemon" ,
129- "año" ,
130- "mango" ,
131- "María" ,
132- "orange" ,
133- "José" ,
134- "papaya" ,
135- ] ;
136-
137- generate_data_from_words ( & words, num_lines)
138- }
139-
140- /// Generate test data with uppercase/lowercase variations
141- fn generate_case_sensitive_data ( num_lines : usize ) -> Vec < u8 > {
142- let base_words = [
143- "apple" , "Apple" , "APPLE" , "banana" , "Banana" , "BANANA" , "café" , "Café" , "CAFÉ" , "über" ,
144- "Über" , "ÜBER" ,
145- ] ;
146-
147- generate_data_from_words_simple ( & base_words, num_lines)
148- }
149-
150- fn setup_test_file ( data : & [ u8 ] ) -> PathBuf {
151- let temp_dir = tempfile:: tempdir ( ) . unwrap ( ) ;
152- let file_path = create_test_file ( data, temp_dir. path ( ) ) ;
153- // Keep temp_dir alive by leaking it - the OS will clean it up
154- std:: mem:: forget ( temp_dir) ;
155- file_path
156- }
8+ use uucore:: benchmark:: { run_util_function, setup_test_file, text_data} ;
1579
15810/// Benchmark sorting ASCII-only data
15911#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
16012fn sort_ascii_only ( bencher : Bencher , num_lines : usize ) {
161- let data = generate_ascii_data ( num_lines) ;
13+ let data = text_data :: generate_ascii_data ( num_lines) ;
16214 let file_path = setup_test_file ( & data) ;
16315
16416 bencher. bench ( || {
@@ -169,7 +21,7 @@ fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
16921/// Benchmark sorting accented/non-ASCII data
17022#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
17123fn sort_accented_data ( bencher : Bencher , num_lines : usize ) {
172- let data = generate_accented_data ( num_lines) ;
24+ let data = text_data :: generate_accented_data ( num_lines) ;
17325 let file_path = setup_test_file ( & data) ;
17426
17527 bencher. bench ( || {
@@ -180,7 +32,7 @@ fn sort_accented_data(bencher: Bencher, num_lines: usize) {
18032/// Benchmark sorting mixed ASCII/non-ASCII data
18133#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
18234fn sort_mixed_data ( bencher : Bencher , num_lines : usize ) {
183- let data = generate_mixed_data ( num_lines) ;
35+ let data = text_data :: generate_mixed_data ( num_lines) ;
18436 let file_path = setup_test_file ( & data) ;
18537
18638 bencher. bench ( || {
@@ -191,7 +43,7 @@ fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
19143/// Benchmark case-sensitive sorting with mixed case data
19244#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
19345fn sort_case_sensitive ( bencher : Bencher , num_lines : usize ) {
194- let data = generate_case_sensitive_data ( num_lines) ;
46+ let data = text_data :: generate_case_sensitive_data ( num_lines) ;
19547 let file_path = setup_test_file ( & data) ;
19648
19749 bencher. bench ( || {
@@ -202,7 +54,7 @@ fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
20254/// Benchmark case-insensitive sorting (fold case)
20355#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
20456fn sort_case_insensitive ( bencher : Bencher , num_lines : usize ) {
205- let data = generate_case_sensitive_data ( num_lines) ;
57+ let data = text_data :: generate_case_sensitive_data ( num_lines) ;
20658 let file_path = setup_test_file ( & data) ;
20759
20860 bencher. bench ( || {
@@ -216,7 +68,7 @@ fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
21668/// Benchmark dictionary order sorting (only blanks and alphanumeric)
21769#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
21870fn sort_dictionary_order ( bencher : Bencher , num_lines : usize ) {
219- let data = generate_mixed_data ( num_lines) ;
71+ let data = text_data :: generate_mixed_data ( num_lines) ;
22072 let file_path = setup_test_file ( & data) ;
22173
22274 bencher. bench ( || {
@@ -251,7 +103,7 @@ fn sort_numeric(bencher: Bencher, num_lines: usize) {
251103/// Benchmark reverse sorting with locale-aware data
252104#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
253105fn sort_reverse_locale ( bencher : Bencher , num_lines : usize ) {
254- let data = generate_accented_data ( num_lines) ;
106+ let data = text_data :: generate_accented_data ( num_lines) ;
255107 let file_path = setup_test_file ( & data) ;
256108
257109 bencher. bench ( || {
@@ -290,7 +142,7 @@ fn sort_key_field(bencher: Bencher, num_lines: usize) {
290142/// Benchmark unique sorting with locale-aware data
291143#[ divan:: bench( args = [ 100_000 , 500_000 ] ) ]
292144fn sort_unique_locale ( bencher : Bencher , num_lines : usize ) {
293- let data = generate_accented_data ( num_lines) ;
145+ let data = text_data :: generate_accented_data ( num_lines) ;
294146 let file_path = setup_test_file ( & data) ;
295147
296148 bencher. bench ( || {
0 commit comments