Skip to content

Commit 0e8f281

Browse files
committed
sort: add benchmark
1 parent 5eb8144 commit 0e8f281

File tree

4 files changed

+546
-0
lines changed

4 files changed

+546
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/sort/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ fluent = { workspace = true }
4040
[target.'cfg(target_os = "linux")'.dependencies]
4141
nix = { workspace = true }
4242

43+
[dev-dependencies]
44+
divan = { workspace = true }
45+
tempfile = { workspace = true }
46+
uucore = { workspace = true, features = ["benchmark", "fs", "parser", "version-cmp", "i18n-collator"] }
47+
4348
[[bin]]
4449
name = "sort"
4550
path = "src/main.rs"

src/uu/sort/benches/sort_bench.rs

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use uu_sort::uumain;
8+
use uucore::benchmark::{create_test_file, run_util_function};
9+
10+
/// Generate test data with ASCII-only text
11+
fn generate_ascii_data(num_lines: usize) -> Vec<u8> {
12+
let mut data = Vec::new();
13+
let words = [
14+
"apple",
15+
"banana",
16+
"cherry",
17+
"date",
18+
"elderberry",
19+
"fig",
20+
"grape",
21+
"honeydew",
22+
"kiwi",
23+
"lemon",
24+
"mango",
25+
"nectarine",
26+
"orange",
27+
"papaya",
28+
"quince",
29+
"raspberry",
30+
"strawberry",
31+
"tangerine",
32+
"ugli",
33+
"vanilla",
34+
"watermelon",
35+
"xigua",
36+
"yellow",
37+
"zucchini",
38+
"avocado",
39+
];
40+
41+
for i in 0..num_lines {
42+
let word = words[i % words.len()];
43+
let number = i % 1000;
44+
data.extend_from_slice(format!("{word}_{number:03}\n").as_bytes());
45+
}
46+
47+
data
48+
}
49+
50+
/// Generate test data with accented characters that require locale-aware sorting
51+
fn generate_accented_data(num_lines: usize) -> Vec<u8> {
52+
let mut data = Vec::new();
53+
let words = [
54+
// French words with accents
55+
"café",
56+
"naïve",
57+
"résumé",
58+
"fiancé",
59+
"crème",
60+
"déjà",
61+
"façade",
62+
"château",
63+
"élève",
64+
"côte",
65+
// German words with umlauts
66+
"über",
67+
"Müller",
68+
"schön",
69+
"Köln",
70+
"Düsseldorf",
71+
"Österreich",
72+
"Zürich",
73+
"Mädchen",
74+
"Bär",
75+
"größer",
76+
// Spanish words with tildes and accents
77+
"niño",
78+
"señor",
79+
"año",
80+
"mañana",
81+
"español",
82+
"corazón",
83+
"María",
84+
"José",
85+
"más",
86+
"también",
87+
];
88+
89+
for i in 0..num_lines {
90+
let word = words[i % words.len()];
91+
let number = i % 1000;
92+
data.extend_from_slice(format!("{word}_{number:03}\n").as_bytes());
93+
}
94+
95+
data
96+
}
97+
98+
/// Generate test data with mixed ASCII and non-ASCII characters
99+
fn generate_mixed_data(num_lines: usize) -> Vec<u8> {
100+
let mut data = Vec::new();
101+
let words = [
102+
// Mix of ASCII and accented words
103+
"apple",
104+
"café",
105+
"banana",
106+
"naïve",
107+
"cherry",
108+
"résumé",
109+
"date",
110+
"fiancé",
111+
"elderberry",
112+
"crème",
113+
"über",
114+
"grape",
115+
"Müller",
116+
"honeydew",
117+
"schön",
118+
"niño",
119+
"kiwi",
120+
"señor",
121+
"lemon",
122+
"año",
123+
"mango",
124+
"María",
125+
"orange",
126+
"José",
127+
"papaya",
128+
];
129+
130+
for i in 0..num_lines {
131+
let word = words[i % words.len()];
132+
let number = i % 1000;
133+
data.extend_from_slice(format!("{word}_{number:03}\n").as_bytes());
134+
}
135+
136+
data
137+
}
138+
139+
/// Generate test data with uppercase/lowercase variations
140+
fn generate_case_sensitive_data(num_lines: usize) -> Vec<u8> {
141+
let mut data = Vec::new();
142+
let base_words = [
143+
"apple", "Apple", "APPLE", "banana", "Banana", "BANANA", "café", "Café", "CAFÉ", "über",
144+
"Über", "ÜBER",
145+
];
146+
147+
for i in 0..num_lines {
148+
let word = base_words[i % base_words.len()];
149+
data.extend_from_slice(format!("{word}\n").as_bytes());
150+
}
151+
152+
data
153+
}
154+
155+
fn setup_test_file(data: &[u8]) -> (tempfile::TempDir, std::path::PathBuf) {
156+
let temp_dir = tempfile::tempdir().unwrap();
157+
let file_path = create_test_file(data, temp_dir.path());
158+
(temp_dir, file_path)
159+
}
160+
161+
/// Benchmark sorting ASCII-only data
162+
#[divan::bench(args = [100_000, 500_000])]
163+
fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
164+
let data = generate_ascii_data(num_lines);
165+
let (_temp_dir, file_path) = setup_test_file(&data);
166+
167+
bencher.bench(|| {
168+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
169+
});
170+
}
171+
172+
/// Benchmark sorting accented/non-ASCII data
173+
#[divan::bench(args = [100_000, 500_000])]
174+
fn sort_accented_data(bencher: Bencher, num_lines: usize) {
175+
let data = generate_accented_data(num_lines);
176+
let (_temp_dir, file_path) = setup_test_file(&data);
177+
178+
bencher.bench(|| {
179+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
180+
});
181+
}
182+
183+
/// Benchmark sorting mixed ASCII/non-ASCII data
184+
#[divan::bench(args = [100_000, 500_000])]
185+
fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
186+
let data = generate_mixed_data(num_lines);
187+
let (_temp_dir, file_path) = setup_test_file(&data);
188+
189+
bencher.bench(|| {
190+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
191+
});
192+
}
193+
194+
/// Benchmark case-sensitive sorting with mixed case data
195+
#[divan::bench(args = [100_000, 500_000])]
196+
fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
197+
let data = generate_case_sensitive_data(num_lines);
198+
let (_temp_dir, file_path) = setup_test_file(&data);
199+
200+
bencher.bench(|| {
201+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
202+
});
203+
}
204+
205+
/// Benchmark case-insensitive sorting (fold case)
206+
#[divan::bench(args = [100_000, 500_000])]
207+
fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
208+
let data = generate_case_sensitive_data(num_lines);
209+
let (_temp_dir, file_path) = setup_test_file(&data);
210+
211+
bencher.bench(|| {
212+
black_box(run_util_function(
213+
uumain,
214+
&["-f", file_path.to_str().unwrap()],
215+
));
216+
});
217+
}
218+
219+
/// Benchmark dictionary order sorting (only blanks and alphanumeric)
220+
#[divan::bench(args = [100_000, 500_000])]
221+
fn sort_dictionary_order(bencher: Bencher, num_lines: usize) {
222+
let data = generate_mixed_data(num_lines);
223+
let (_temp_dir, file_path) = setup_test_file(&data);
224+
225+
bencher.bench(|| {
226+
black_box(run_util_function(
227+
uumain,
228+
&["-d", file_path.to_str().unwrap()],
229+
));
230+
});
231+
}
232+
233+
/// Benchmark numeric sorting with mixed data
234+
#[divan::bench(args = [100_000, 500_000])]
235+
fn sort_numeric(bencher: Bencher, num_lines: usize) {
236+
let mut data = Vec::new();
237+
238+
// Generate numeric data with some text prefixes
239+
for i in 0..num_lines {
240+
let value = (i * 13) % 10000; // Pseudo-random numeric values
241+
data.extend_from_slice(format!("value_{value}\n").as_bytes());
242+
}
243+
244+
let (_temp_dir, file_path) = setup_test_file(&data);
245+
246+
bencher.bench(|| {
247+
black_box(run_util_function(
248+
uumain,
249+
&["-n", file_path.to_str().unwrap()],
250+
));
251+
});
252+
}
253+
254+
/// Benchmark reverse sorting with locale-aware data
255+
#[divan::bench(args = [100_000, 500_000])]
256+
fn sort_reverse_locale(bencher: Bencher, num_lines: usize) {
257+
let data = generate_accented_data(num_lines);
258+
let (_temp_dir, file_path) = setup_test_file(&data);
259+
260+
bencher.bench(|| {
261+
black_box(run_util_function(
262+
uumain,
263+
&["-r", file_path.to_str().unwrap()],
264+
));
265+
});
266+
}
267+
268+
/// Benchmark sorting with specific key field
269+
#[divan::bench(args = [100_000, 500_000])]
270+
fn sort_key_field(bencher: Bencher, num_lines: usize) {
271+
let mut data = Vec::new();
272+
273+
// Generate data with multiple fields
274+
let words = ["café", "naïve", "apple", "über", "banana"];
275+
for i in 0..num_lines {
276+
let word = words[i % words.len()];
277+
let num1 = i % 100;
278+
let num2 = (i * 7) % 100;
279+
data.extend_from_slice(format!("{num1}\t{word}\t{num2}\n").as_bytes());
280+
}
281+
282+
let (_temp_dir, file_path) = setup_test_file(&data);
283+
284+
bencher.bench(|| {
285+
// Sort by second field
286+
black_box(run_util_function(
287+
uumain,
288+
&["-k", "2", file_path.to_str().unwrap()],
289+
));
290+
});
291+
}
292+
293+
/// Benchmark unique sorting with locale-aware data
294+
#[divan::bench(args = [100_000, 500_000])]
295+
fn sort_unique_locale(bencher: Bencher, num_lines: usize) {
296+
let data = generate_accented_data(num_lines);
297+
let (_temp_dir, file_path) = setup_test_file(&data);
298+
299+
bencher.bench(|| {
300+
black_box(run_util_function(
301+
uumain,
302+
&["-u", file_path.to_str().unwrap()],
303+
));
304+
});
305+
}
306+
307+
fn main() {
308+
divan::main();
309+
}

0 commit comments

Comments
 (0)