Skip to content

Commit 249cb01

Browse files
Optimize std::transform for vector<bool> (#5769)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 6a57871 commit 249cb01

File tree

8 files changed

+337
-3
lines changed

8 files changed

+337
-3
lines changed

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,4 @@ add_benchmark(vector_bool_copy src/vector_bool_copy.cpp)
142142
add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp)
143143
add_benchmark(vector_bool_count src/vector_bool_count.cpp)
144144
add_benchmark(vector_bool_move src/vector_bool_move.cpp)
145+
add_benchmark(vector_bool_transform src/vector_bool_transform.cpp)

benchmarks/inc/utility.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
#include <type_traits>
1111
#include <vector>
1212

13-
template <class Contained, template <class> class Alloc = std::allocator>
14-
std::vector<Contained, Alloc<Contained>> random_vector(size_t n) {
13+
template <class Contained, template <class> class Alloc = std::allocator, class... Seed>
14+
std::vector<Contained, Alloc<Contained>> random_vector(size_t n, Seed... seed) {
1515
std::vector<Contained, Alloc<Contained>> res(n);
16-
std::mt19937_64 prng;
16+
std::mt19937_64 prng{seed...};
1717

1818
if constexpr (std::is_same_v<Contained, bool>) {
1919
std::generate(res.begin(), res.end(), [&prng] { return static_cast<bool>(prng() & 1); });
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#include <benchmark/benchmark.h>
5+
//
6+
#include <algorithm>
7+
#include <cstddef>
8+
#include <functional>
9+
#include <random>
10+
#include <vector>
11+
12+
#include "skewed_allocator.hpp"
13+
#include "utility.hpp"
14+
15+
using namespace std;
16+
17+
template <class Pred>
18+
void transform_one_input_aligned(benchmark::State& state) {
19+
const auto size = static_cast<size_t>(state.range(0));
20+
auto source = random_vector<bool, not_highly_aligned_allocator>(size);
21+
vector<bool> dest(size, false);
22+
23+
for (auto _ : state) {
24+
benchmark::DoNotOptimize(source);
25+
transform(source.begin(), source.end(), dest.begin(), Pred{});
26+
benchmark::DoNotOptimize(dest);
27+
}
28+
}
29+
30+
template <class Pred>
31+
void transform_two_inputs_aligned(benchmark::State& state) {
32+
const auto size = static_cast<size_t>(state.range(0));
33+
auto source1 = random_vector<bool, not_highly_aligned_allocator>(size);
34+
auto source2 = random_vector<bool, not_highly_aligned_allocator>(size, 1729u);
35+
vector<bool> dest(size, false);
36+
37+
for (auto _ : state) {
38+
benchmark::DoNotOptimize(source1);
39+
benchmark::DoNotOptimize(source2);
40+
transform(source1.begin(), source1.end(), source2.begin(), dest.begin(), Pred{});
41+
benchmark::DoNotOptimize(dest);
42+
}
43+
}
44+
45+
void common_args(auto bm) {
46+
bm->RangeMultiplier(64)->Range(64, 64 << 10);
47+
}
48+
49+
BENCHMARK(transform_two_inputs_aligned<logical_and<>>)->Apply(common_args);
50+
BENCHMARK(transform_two_inputs_aligned<logical_or<>>)->Apply(common_args);
51+
BENCHMARK(transform_one_input_aligned<logical_not<>>)->Apply(common_args);
52+
53+
BENCHMARK_MAIN();

stl/inc/algorithm

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4103,13 +4103,29 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and
41034103
swap(*_Left, *_Right); // intentional ADL
41044104
}
41054105

4106+
template <class _VbIt, class _OutIt, class _Mapped_fn>
4107+
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(_VbIt _First, _VbIt _Last, _OutIt _Dest, _Mapped_fn _Mapped_func);
4108+
4109+
template <class _VbIt1, class _VbIt2, class _OutIt, class _Mapped_fn>
4110+
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
4111+
_VbIt1 _First1, _VbIt1 _Last1, _VbIt2 _First2, _OutIt _Dest, _Mapped_fn _Mapped_func);
4112+
41064113
_EXPORT_STD template <class _InIt, class _OutIt, class _Fn>
41074114
_CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) {
41084115
// transform [_First, _Last) with _Func
41094116
_STD _Adl_verify_range(_First, _Last);
41104117
auto _UFirst = _STD _Get_unwrapped(_First);
41114118
const auto _ULast = _STD _Get_unwrapped(_Last);
41124119
auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast));
4120+
4121+
if constexpr (_Is_vb_iterator<_InIt> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) {
4122+
if (_UFirst._Myoff == 0 && _UDest._Myoff == 0) {
4123+
_UDest = _Transform_vbool_aligned(_UFirst, _ULast, _UDest, _Map_vb_functor_t<_Fn>{});
4124+
_STD _Seek_wrapped(_Dest, _UDest);
4125+
return _Dest;
4126+
}
4127+
}
4128+
41134129
for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) {
41144130
*_UDest = _Func(*_UFirst);
41154131
}
@@ -4133,6 +4149,16 @@ _CONSTEXPR20 _OutIt transform(
41334149
const auto _Count = _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1);
41344150
auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _Count);
41354151
auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count);
4152+
4153+
if constexpr (_Is_vb_iterator<_InIt1> && _Is_vb_iterator<_InIt2> && _Is_vb_iterator<_OutIt, true>
4154+
&& !is_same_v<_Map_vb_functor_t<_Fn>, void>) {
4155+
if (_UFirst1._Myoff == 0 && _UFirst2._Myoff == 0 && _UDest._Myoff == 0) {
4156+
_UDest = _Transform_vbool_aligned(_UFirst1, _ULast1, _UFirst2, _UDest, _Map_vb_functor_t<_Fn>{});
4157+
_STD _Seek_wrapped(_Dest, _UDest);
4158+
return _Dest;
4159+
}
4160+
}
4161+
41364162
for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2, ++_UDest) {
41374163
*_UDest = _Func(*_UFirst1, *_UFirst2);
41384164
}

stl/inc/functional

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,62 @@ struct bit_not<void> {
258258
using is_transparent = int;
259259
};
260260

261+
struct _Bit_xnor {
262+
template <class _Ty1, class _Ty2>
263+
_NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const //
264+
-> decltype(~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right))) {
265+
return ~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right));
266+
}
267+
268+
using is_transparent = int;
269+
};
270+
271+
template <class _Ty>
272+
constexpr bool _Is_vbool_functor_arg = is_same_v<_Ty, void> || is_integral_v<_Ty>;
273+
274+
template <class _Ty>
275+
struct _Map_vb_functor<logical_and<_Ty>> {
276+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>;
277+
};
278+
279+
template <class _Ty>
280+
struct _Map_vb_functor<bit_and<_Ty>> {
281+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>;
282+
};
283+
284+
template <class _Ty>
285+
struct _Map_vb_functor<logical_or<_Ty>> {
286+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>;
287+
};
288+
289+
template <class _Ty>
290+
struct _Map_vb_functor<bit_or<_Ty>> {
291+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>;
292+
};
293+
294+
template <class _Ty>
295+
struct _Map_vb_functor<not_equal_to<_Ty>> {
296+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>;
297+
};
298+
299+
template <class _Ty>
300+
struct _Map_vb_functor<bit_xor<_Ty>> {
301+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>;
302+
};
303+
304+
template <class _Ty>
305+
struct _Map_vb_functor<equal_to<_Ty>> {
306+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>;
307+
};
308+
309+
template <class _Ty>
310+
struct _Map_vb_functor<logical_not<_Ty>> {
311+
using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>;
312+
};
313+
314+
// bit_not isn't mapped to itself because it emits MSVC warning C4804 "'~': unsafe use of type 'bool' in operation"
315+
// and Clang -Wbool-operation "bitwise negation of a boolean expression; did you mean logical negation?".
316+
261317
#if _HAS_DEPRECATED_NEGATORS
262318
_STL_DISABLE_DEPRECATED_WARNING
263319
_EXPORT_STD template <class _Fn>

stl/inc/vector

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,6 +4004,49 @@ _CONSTEXPR20 _OutIt _Copy_vbool(_VbIt _First, _VbIt _Last, _OutIt _Dest) {
40044004
return _DestEnd;
40054005
}
40064006

4007+
template <class _VbIt, class _OutIt, class _Mapped_fn>
4008+
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
4009+
const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func) {
4010+
auto _First_ptr = _First._Myptr;
4011+
const auto _Last_ptr = _Last._Myptr;
4012+
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr);
4013+
4014+
for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) {
4015+
*_Dest_ptr = _Mapped_func(*_First_ptr);
4016+
}
4017+
4018+
if (_Last._Myoff != 0) {
4019+
const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1;
4020+
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First_ptr) & _Mask);
4021+
_Dest._Myoff = _Last._Myoff;
4022+
}
4023+
4024+
_Dest._Myptr = _Dest_ptr;
4025+
return _Dest;
4026+
}
4027+
4028+
template <class _VbIt1, class _VbIt2, class _OutIt, class _Mapped_fn>
4029+
_CONSTEXPR20 _OutIt _Transform_vbool_aligned(
4030+
const _VbIt1 _First1, const _VbIt1 _Last1, const _VbIt2 _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) {
4031+
auto _First1_ptr = _First1._Myptr;
4032+
auto _First2_ptr = _First2._Myptr;
4033+
const auto _Last1_ptr = _Last1._Myptr;
4034+
auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr);
4035+
4036+
for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) {
4037+
*_Dest_ptr = _Mapped_func(*_First1_ptr, *_First2_ptr);
4038+
}
4039+
4040+
if (_Last1._Myoff != 0) {
4041+
const _Vbase _Mask = (_Vbase{1} << _Last1._Myoff) - 1;
4042+
*_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First1_ptr, *_First2_ptr) & _Mask);
4043+
_Dest._Myoff = _Last1._Myoff;
4044+
}
4045+
4046+
_Dest._Myptr = _Dest_ptr;
4047+
return _Dest;
4048+
}
4049+
40074050
#undef _ASAN_VECTOR_MODIFY
40084051
#undef _ASAN_VECTOR_REMOVE
40094052
#undef _ASAN_VECTOR_CREATE

stl/inc/xutility

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4868,6 +4868,14 @@ _OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _
48684868
return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count);
48694869
}
48704870

4871+
template <class _Fn>
4872+
struct _Map_vb_functor {
4873+
using type = void;
4874+
};
4875+
4876+
template <class _Fn>
4877+
using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::type;
4878+
48714879
template <class _It, bool _RequiresMutable = false>
48724880
constexpr bool _Is_vb_iterator = false;
48734881

0 commit comments

Comments
 (0)