3
3
// We use this mainly to skip repeated `/`. If there is only one slash, `memrnchr` performs the same
4
4
// as a naive version (e.g. `rposition`). However, it is much faster in pathological cases.
5
5
6
- const LO_U64 : u64 = 0x0101010101010101 ;
7
- const HI_U64 : u64 = 0x8080808080808080 ;
8
-
9
- // use truncation
10
- const LO_USIZE : usize = LO_U64 as usize ;
11
- const HI_USIZE : usize = HI_U64 as usize ;
12
-
13
- #[ cfg( target_pointer_width = "32" ) ]
14
- const USIZE_BYTES : usize = 4 ;
15
- #[ cfg( target_pointer_width = "64" ) ]
16
- const USIZE_BYTES : usize = 8 ;
6
+ use std:: mem:: size_of;
17
7
18
8
// Returns the byte offset of the last byte that is NOT equal to the given one.
19
9
#[ inline( always) ]
@@ -28,12 +18,11 @@ pub fn memrnchr(x: u8, text: &[u8]) -> Option<usize> {
28
18
let ptr = text. as_ptr ( ) ;
29
19
30
20
// search to an aligned boundary
31
- let end_align = ( ptr as usize + len) & ( USIZE_BYTES - 1 ) ;
21
+ let end_align = ( ptr as usize + len) & ( size_of :: < usize > ( ) - 1 ) ;
32
22
let mut offset;
33
23
if end_align > 0 {
34
24
offset = if end_align >= len { 0 } else { len - end_align } ;
35
- let pos = text[ offset..] . iter ( ) . rposition ( |elt| * elt != x) ;
36
- if let Some ( index) = pos {
25
+ if let Some ( index) = memrnchr_naive ( x, & text[ offset..] ) {
37
26
return Some ( offset + index) ;
38
27
}
39
28
} else {
@@ -42,37 +31,25 @@ pub fn memrnchr(x: u8, text: &[u8]) -> Option<usize> {
42
31
43
32
// search the body of the text
44
33
let repeated_x = repeat_byte ( x) ;
45
-
46
- while offset >= 2 * USIZE_BYTES {
47
- debug_assert_eq ! ( ( ptr as usize + offset) % USIZE_BYTES , 0 ) ;
34
+ while offset >= 2 * size_of :: < usize > ( ) {
35
+ debug_assert_eq ! ( ( ptr as usize + offset) % size_of:: <usize >( ) , 0 ) ;
48
36
unsafe {
49
- let u = * ( ptr. offset ( offset as isize - 2 * USIZE_BYTES as isize ) as * const usize ) ;
50
- let v = * ( ptr. offset ( offset as isize - USIZE_BYTES as isize ) as * const usize ) ;
51
-
52
- // break if there is a matching byte
53
- let zu = contains_zero_byte ( u ^ repeated_x) ;
54
- let zv = contains_zero_byte ( v ^ repeated_x) ;
55
- if !zu || !zv {
37
+ let u = * ( ptr. offset ( offset as isize - 2 * size_of :: < usize > ( ) as isize ) as * const usize ) ;
38
+ let v = * ( ptr. offset ( offset as isize - size_of :: < usize > ( ) as isize ) as * const usize ) ;
39
+ if u & repeated_x != usize:: max_value ( ) || v & repeated_x != usize:: max_value ( ) {
56
40
break ;
57
41
}
58
42
}
59
- offset -= 2 * USIZE_BYTES ;
43
+ offset -= 2 * size_of :: < usize > ( ) ;
60
44
}
61
45
62
46
// find the byte before the point the body loop stopped
63
- text[ ..offset] . iter ( ) . rposition ( |elt| * elt != x )
47
+ memrnchr_naive ( x , & text[ ..offset] )
64
48
}
65
49
66
- /// Return `true` if `x` contains any zero byte.
67
- ///
68
- /// From *Matters Computational*, J. Arndt
69
- ///
70
- /// "The idea is to subtract one from each of the bytes and then look for
71
- /// bytes where the borrow propagated all the way to the most significant
72
- /// bit."
73
- #[ inline]
74
- fn contains_zero_byte ( x : usize ) -> bool {
75
- x. wrapping_sub ( LO_USIZE ) & !x & HI_USIZE != 0
50
+ #[ inline( always) ]
51
+ fn memrnchr_naive ( x : u8 , text : & [ u8 ] ) -> Option < usize > {
52
+ text. iter ( ) . rposition ( |c| * c != x)
76
53
}
77
54
78
55
#[ cfg( target_pointer_width = "32" ) ]
0 commit comments