Skip to content

Commit 662ea52

Browse files
committed
sort: add benchmark
1 parent 7dbeb8f commit 662ea52

File tree

4 files changed

+552
-0
lines changed

4 files changed

+552
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/sort/Cargo.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@ fluent = { workspace = true }
4040
[target.'cfg(target_os = "linux")'.dependencies]
4141
nix = { workspace = true }
4242

43+
[dev-dependencies]
44+
divan = { workspace = true }
45+
tempfile = { workspace = true }
46+
uucore = { workspace = true, features = [
47+
"benchmark",
48+
"fs",
49+
"parser",
50+
"version-cmp",
51+
"i18n-collator",
52+
] }
53+
4354
[[bin]]
4455
name = "sort"
4556
path = "src/main.rs"
57+
58+
[[bench]]
59+
name = "sort_bench"
60+
harness = false

src/uu/sort/benches/sort_bench.rs

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
use divan::{Bencher, black_box};
7+
use uu_sort::uumain;
8+
use uucore::benchmark::{create_test_file, run_util_function};
9+
10+
/// Helper function to generate test data from a list of words
11+
fn generate_data_from_words(words: &[&str], num_lines: usize) -> Vec<u8> {
12+
let mut data = Vec::new();
13+
for i in 0..num_lines {
14+
let word = words[i % words.len()];
15+
let number = i % 1000;
16+
data.extend_from_slice(format!("{word}_{number:03}\n").as_bytes());
17+
}
18+
data
19+
}
20+
21+
/// Helper function to generate test data from a list of words without number suffix
22+
fn generate_data_from_words_simple(words: &[&str], num_lines: usize) -> Vec<u8> {
23+
let mut data = Vec::new();
24+
for i in 0..num_lines {
25+
let word = words[i % words.len()];
26+
data.extend_from_slice(format!("{word}\n").as_bytes());
27+
}
28+
data
29+
}
30+
31+
/// Generate test data with ASCII-only text
32+
fn generate_ascii_data(num_lines: usize) -> Vec<u8> {
33+
let words = [
34+
"apple",
35+
"banana",
36+
"cherry",
37+
"date",
38+
"elderberry",
39+
"fig",
40+
"grape",
41+
"honeydew",
42+
"kiwi",
43+
"lemon",
44+
"mango",
45+
"nectarine",
46+
"orange",
47+
"papaya",
48+
"quince",
49+
"raspberry",
50+
"strawberry",
51+
"tangerine",
52+
"ugli",
53+
"vanilla",
54+
"watermelon",
55+
"xigua",
56+
"yellow",
57+
"zucchini",
58+
"avocado",
59+
];
60+
61+
generate_data_from_words(&words, num_lines)
62+
}
63+
64+
/// Generate test data with accented characters that require locale-aware sorting
65+
fn generate_accented_data(num_lines: usize) -> Vec<u8> {
66+
let words = [
67+
// French words with accents
68+
"café",
69+
"naïve",
70+
"résumé",
71+
"fiancé",
72+
"crème",
73+
"déjà",
74+
"façade",
75+
"château",
76+
"élève",
77+
"côte",
78+
// German words with umlauts
79+
"über",
80+
"Müller",
81+
"schön",
82+
"Köln",
83+
"Düsseldorf",
84+
"Österreich",
85+
"Zürich",
86+
"Mädchen",
87+
"Bär",
88+
"größer",
89+
// Spanish words with tildes and accents
90+
"niño",
91+
"señor",
92+
"año",
93+
"mañana",
94+
"español",
95+
"corazón",
96+
"María",
97+
"José",
98+
"más",
99+
"también",
100+
];
101+
102+
generate_data_from_words(&words, num_lines)
103+
}
104+
105+
/// Generate test data with mixed ASCII and non-ASCII characters
106+
fn generate_mixed_data(num_lines: usize) -> Vec<u8> {
107+
let words = [
108+
// Mix of ASCII and accented words
109+
"apple",
110+
"café",
111+
"banana",
112+
"naïve",
113+
"cherry",
114+
"résumé",
115+
"date",
116+
"fiancé",
117+
"elderberry",
118+
"crème",
119+
"über",
120+
"grape",
121+
"Müller",
122+
"honeydew",
123+
"schön",
124+
"niño",
125+
"kiwi",
126+
"señor",
127+
"lemon",
128+
"año",
129+
"mango",
130+
"María",
131+
"orange",
132+
"José",
133+
"papaya",
134+
];
135+
136+
generate_data_from_words(&words, num_lines)
137+
}
138+
139+
/// Generate test data with uppercase/lowercase variations
140+
fn generate_case_sensitive_data(num_lines: usize) -> Vec<u8> {
141+
let base_words = [
142+
"apple", "Apple", "APPLE", "banana", "Banana", "BANANA", "café", "Café", "CAFÉ", "über",
143+
"Über", "ÜBER",
144+
];
145+
146+
generate_data_from_words_simple(&base_words, num_lines)
147+
}
148+
149+
fn setup_test_file(data: &[u8]) -> std::path::PathBuf {
150+
let temp_dir = tempfile::tempdir().unwrap();
151+
let file_path = create_test_file(data, temp_dir.path());
152+
// Keep temp_dir alive by leaking it - the OS will clean it up
153+
std::mem::forget(temp_dir);
154+
file_path
155+
}
156+
157+
/// Benchmark sorting ASCII-only data
158+
#[divan::bench(args = [100_000, 500_000])]
159+
fn sort_ascii_only(bencher: Bencher, num_lines: usize) {
160+
let data = generate_ascii_data(num_lines);
161+
let file_path = setup_test_file(&data);
162+
163+
bencher.bench(|| {
164+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
165+
});
166+
}
167+
168+
/// Benchmark sorting accented/non-ASCII data
169+
#[divan::bench(args = [100_000, 500_000])]
170+
fn sort_accented_data(bencher: Bencher, num_lines: usize) {
171+
let data = generate_accented_data(num_lines);
172+
let file_path = setup_test_file(&data);
173+
174+
bencher.bench(|| {
175+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
176+
});
177+
}
178+
179+
/// Benchmark sorting mixed ASCII/non-ASCII data
180+
#[divan::bench(args = [100_000, 500_000])]
181+
fn sort_mixed_data(bencher: Bencher, num_lines: usize) {
182+
let data = generate_mixed_data(num_lines);
183+
let file_path = setup_test_file(&data);
184+
185+
bencher.bench(|| {
186+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
187+
});
188+
}
189+
190+
/// Benchmark case-sensitive sorting with mixed case data
191+
#[divan::bench(args = [100_000, 500_000])]
192+
fn sort_case_sensitive(bencher: Bencher, num_lines: usize) {
193+
let data = generate_case_sensitive_data(num_lines);
194+
let file_path = setup_test_file(&data);
195+
196+
bencher.bench(|| {
197+
black_box(run_util_function(uumain, &[file_path.to_str().unwrap()]));
198+
});
199+
}
200+
201+
/// Benchmark case-insensitive sorting (fold case)
202+
#[divan::bench(args = [100_000, 500_000])]
203+
fn sort_case_insensitive(bencher: Bencher, num_lines: usize) {
204+
let data = generate_case_sensitive_data(num_lines);
205+
let file_path = setup_test_file(&data);
206+
207+
bencher.bench(|| {
208+
black_box(run_util_function(
209+
uumain,
210+
&["-f", file_path.to_str().unwrap()],
211+
));
212+
});
213+
}
214+
215+
/// Benchmark dictionary order sorting (only blanks and alphanumeric)
216+
#[divan::bench(args = [100_000, 500_000])]
217+
fn sort_dictionary_order(bencher: Bencher, num_lines: usize) {
218+
let data = generate_mixed_data(num_lines);
219+
let file_path = setup_test_file(&data);
220+
221+
bencher.bench(|| {
222+
black_box(run_util_function(
223+
uumain,
224+
&["-d", file_path.to_str().unwrap()],
225+
));
226+
});
227+
}
228+
229+
/// Benchmark numeric sorting with mixed data
230+
#[divan::bench(args = [100_000, 500_000])]
231+
fn sort_numeric(bencher: Bencher, num_lines: usize) {
232+
let mut data = Vec::new();
233+
234+
// Generate numeric data with some text prefixes
235+
for i in 0..num_lines {
236+
let value = (i * 13) % 10000; // Pseudo-random numeric values
237+
data.extend_from_slice(format!("value_{value}\n").as_bytes());
238+
}
239+
240+
let file_path = setup_test_file(&data);
241+
242+
bencher.bench(|| {
243+
black_box(run_util_function(
244+
uumain,
245+
&["-n", file_path.to_str().unwrap()],
246+
));
247+
});
248+
}
249+
250+
/// Benchmark reverse sorting with locale-aware data
251+
#[divan::bench(args = [100_000, 500_000])]
252+
fn sort_reverse_locale(bencher: Bencher, num_lines: usize) {
253+
let data = generate_accented_data(num_lines);
254+
let file_path = setup_test_file(&data);
255+
256+
bencher.bench(|| {
257+
black_box(run_util_function(
258+
uumain,
259+
&["-r", file_path.to_str().unwrap()],
260+
));
261+
});
262+
}
263+
264+
/// Benchmark sorting with specific key field
265+
#[divan::bench(args = [100_000, 500_000])]
266+
fn sort_key_field(bencher: Bencher, num_lines: usize) {
267+
let mut data = Vec::new();
268+
269+
// Generate data with multiple fields
270+
let words = ["café", "naïve", "apple", "über", "banana"];
271+
for i in 0..num_lines {
272+
let word = words[i % words.len()];
273+
let num1 = i % 100;
274+
let num2 = (i * 7) % 100;
275+
data.extend_from_slice(format!("{num1}\t{word}\t{num2}\n").as_bytes());
276+
}
277+
278+
let file_path = setup_test_file(&data);
279+
280+
bencher.bench(|| {
281+
// Sort by second field
282+
black_box(run_util_function(
283+
uumain,
284+
&["-k", "2", file_path.to_str().unwrap()],
285+
));
286+
});
287+
}
288+
289+
/// Benchmark unique sorting with locale-aware data
290+
#[divan::bench(args = [100_000, 500_000])]
291+
fn sort_unique_locale(bencher: Bencher, num_lines: usize) {
292+
let data = generate_accented_data(num_lines);
293+
let file_path = setup_test_file(&data);
294+
295+
bencher.bench(|| {
296+
black_box(run_util_function(
297+
uumain,
298+
&["-u", file_path.to_str().unwrap()],
299+
));
300+
});
301+
}
302+
303+
fn main() {
304+
divan::main();
305+
}

0 commit comments

Comments
 (0)