diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 35c33ddc063..9aa8a8c0450 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -142,3 +142,4 @@ add_benchmark(vector_bool_copy src/vector_bool_copy.cpp) add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp) add_benchmark(vector_bool_count src/vector_bool_count.cpp) add_benchmark(vector_bool_move src/vector_bool_move.cpp) +add_benchmark(vector_bool_transform src/vector_bool_transform.cpp) diff --git a/benchmarks/inc/utility.hpp b/benchmarks/inc/utility.hpp index a8f9eda29a5..54db900fbe0 100644 --- a/benchmarks/inc/utility.hpp +++ b/benchmarks/inc/utility.hpp @@ -10,10 +10,10 @@ #include #include -template class Alloc = std::allocator> -std::vector> random_vector(size_t n) { +template class Alloc = std::allocator, class... Seed> +std::vector> random_vector(size_t n, Seed... seed) { std::vector> res(n); - std::mt19937_64 prng; + std::mt19937_64 prng{seed...}; if constexpr (std::is_same_v) { std::generate(res.begin(), res.end(), [&prng] { return static_cast(prng() & 1); }); diff --git a/benchmarks/src/vector_bool_transform.cpp b/benchmarks/src/vector_bool_transform.cpp new file mode 100644 index 00000000000..4f54882c18c --- /dev/null +++ b/benchmarks/src/vector_bool_transform.cpp @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +// +#include +#include +#include +#include +#include + +#include "skewed_allocator.hpp" +#include "utility.hpp" + +using namespace std; + +template +void transform_one_input_aligned(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + auto source = random_vector(size); + vector dest(size, false); + + for (auto _ : state) { + benchmark::DoNotOptimize(source); + transform(source.begin(), source.end(), dest.begin(), Pred{}); + benchmark::DoNotOptimize(dest); + } +} + +template +void transform_two_inputs_aligned(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + auto source1 = random_vector(size); + auto source2 = random_vector(size, 1729u); + vector dest(size, false); + + for (auto _ : state) { + benchmark::DoNotOptimize(source1); + benchmark::DoNotOptimize(source2); + transform(source1.begin(), source1.end(), source2.begin(), dest.begin(), Pred{}); + benchmark::DoNotOptimize(dest); + } +} + +void common_args(auto bm) { + bm->RangeMultiplier(64)->Range(64, 64 << 10); +} + +BENCHMARK(transform_two_inputs_aligned>)->Apply(common_args); +BENCHMARK(transform_two_inputs_aligned>)->Apply(common_args); +BENCHMARK(transform_one_input_aligned>)->Apply(common_args); + +BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 1ceda78079b..d5832db3e16 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -4063,6 +4063,13 @@ _CONSTEXPR20 void iter_swap(_FwdIt1 _Left, _FwdIt2 _Right) { // swap *_Left and swap(*_Left, *_Right); // intentional ADL } +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned(_VbIt _First, _VbIt _Last, _OutIt _Dest, _Mapped_fn _Mapped_func); + +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + _VbIt1 _First1, _VbIt1 _Last1, _VbIt2 _First2, _OutIt _Dest, _Mapped_fn _Mapped_func); + _EXPORT_STD template _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Fn _Func) { // transform [_First, _Last) with _Func @@ -4070,6 +4077,15 @@ _CONSTEXPR20 _OutIt transform(const _InIt _First, const _InIt _Last, _OutIt _Des auto _UFirst = _STD _Get_unwrapped(_First); const auto _ULast = _STD _Get_unwrapped(_Last); auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); + + if constexpr (_Is_vb_iterator<_InIt> && _Is_vb_iterator<_OutIt, true> && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { + if (_UFirst._Myoff == 0 && _UDest._Myoff == 0) { + _UDest = _Transform_vbool_aligned(_UFirst, _ULast, _UDest, _Map_vb_functor_t<_Fn>{}); + _STD _Seek_wrapped(_Dest, _UDest); + return _Dest; + } + } + for (; _UFirst != _ULast; ++_UFirst, (void) ++_UDest) { *_UDest = _Func(*_UFirst); } @@ -4093,6 +4109,16 @@ _CONSTEXPR20 _OutIt transform( const auto _Count = _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1); auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _Count); auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); + + if constexpr (_Is_vb_iterator<_InIt1> && _Is_vb_iterator<_InIt2> && _Is_vb_iterator<_OutIt, true> + && !is_same_v<_Map_vb_functor_t<_Fn>, void>) { + if (_UFirst1._Myoff == 0 && _UFirst2._Myoff == 0 && _UDest._Myoff == 0) { + _UDest = _Transform_vbool_aligned(_UFirst1, _ULast1, _UFirst2, _UDest, _Map_vb_functor_t<_Fn>{}); + _STD _Seek_wrapped(_Dest, _UDest); + return _Dest; + } + } + for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2, ++_UDest) { *_UDest = _Func(*_UFirst1, *_UFirst2); } diff --git a/stl/inc/functional b/stl/inc/functional index 04fa643e51f..0ec049912b0 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -258,6 +258,62 @@ struct bit_not { using is_transparent = int; }; +struct _Bit_xnor { + template + _NODISCARD constexpr auto operator()(_Ty1&& _Left, _Ty2&& _Right) const // + -> decltype(~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right))) { + return ~(_STD forward<_Ty1>(_Left) ^ _STD forward<_Ty2>(_Right)); + } + + using is_transparent = int; +}; + +template +constexpr bool _Is_vbool_functor_arg = is_same_v<_Ty, void> || is_integral_v<_Ty>; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_and<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_or<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_xor<>, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, _Bit_xnor, void>; +}; + +template +struct _Map_vb_functor> { + using type = conditional_t<_Is_vbool_functor_arg<_Ty>, bit_not<>, void>; +}; + +// bit_not isn't mapped to itself because it emits MSVC warning C4804 "'~': unsafe use of type 'bool' in operation" +// and Clang -Wbool-operation "bitwise negation of a boolean expression; did you mean logical negation?". + #if _HAS_DEPRECATED_NEGATORS _STL_DISABLE_DEPRECATED_WARNING _EXPORT_STD template diff --git a/stl/inc/vector b/stl/inc/vector index 6d4811a8d5e..dafac7c5933 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -4004,6 +4004,49 @@ _CONSTEXPR20 _OutIt _Copy_vbool(_VbIt _First, _VbIt _Last, _OutIt _Dest) { return _DestEnd; } +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt _First, const _VbIt _Last, _OutIt _Dest, const _Mapped_fn _Mapped_func) { + auto _First_ptr = _First._Myptr; + const auto _Last_ptr = _Last._Myptr; + auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); + + for (; _First_ptr != _Last_ptr; ++_First_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_func(*_First_ptr); + } + + if (_Last._Myoff != 0) { + const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1; + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First_ptr) & _Mask); + _Dest._Myoff = _Last._Myoff; + } + + _Dest._Myptr = _Dest_ptr; + return _Dest; +} + +template +_CONSTEXPR20 _OutIt _Transform_vbool_aligned( + const _VbIt1 _First1, const _VbIt1 _Last1, const _VbIt2 _First2, _OutIt _Dest, const _Mapped_fn _Mapped_func) { + auto _First1_ptr = _First1._Myptr; + auto _First2_ptr = _First2._Myptr; + const auto _Last1_ptr = _Last1._Myptr; + auto _Dest_ptr = const_cast<_Vbase*>(_Dest._Myptr); + + for (; _First1_ptr != _Last1_ptr; ++_First1_ptr, ++_First2_ptr, ++_Dest_ptr) { + *_Dest_ptr = _Mapped_func(*_First1_ptr, *_First2_ptr); + } + + if (_Last1._Myoff != 0) { + const _Vbase _Mask = (_Vbase{1} << _Last1._Myoff) - 1; + *_Dest_ptr = (*_Dest_ptr & ~_Mask) | (_Mapped_func(*_First1_ptr, *_First2_ptr) & _Mask); + _Dest._Myoff = _Last1._Myoff; + } + + _Dest._Myptr = _Dest_ptr; + return _Dest; +} + #undef _ASAN_VECTOR_MODIFY #undef _ASAN_VECTOR_REMOVE #undef _ASAN_VECTOR_CREATE diff --git a/stl/inc/xutility b/stl/inc/xutility index 43d6fc306d9..d48a510f169 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -4882,6 +4882,14 @@ _OutCtgIt _Copy_memmove_n(_CtgIt _First, const size_t _Object_count, _OutCtgIt _ return _STD _Copy_memmove_tail(_First_ch, _STD move(_Dest), _Byte_count, _Object_count); } +template +struct _Map_vb_functor { + using type = void; +}; + +template +using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::type; + template constexpr bool _Is_vb_iterator = false; diff --git a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp index c75796a0714..41e73cff35a 100644 --- a/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp +++ b/tests/std/tests/GH_000625_vector_bool_optimization/test.cpp @@ -45,6 +45,151 @@ constexpr bool source_raw[] = { // true, false, true, false, true, true, true, false, // true, false, true, false, true, true, true, false}; +CONSTEXPR20 void test_transform_helper(const size_t length) { + // Only no offset case + + // This test data is not random, but irregular enough to ensure confidence in the tests + constexpr bool source2_raw[] = {// + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true, // + true, true, false, false, false, false, true, true}; + +#if _HAS_CXX17 + static_assert(size(source_raw) == size(source2_raw)); +#endif // _HAS_CXX17 + + bool and_expected_raw[size(source_raw)]; + bool or_expected_raw[size(source_raw)]; + bool xor_expected_raw[size(source_raw)]; + bool xnor_expected_raw[size(source_raw)]; + bool not_expected_raw[size(source_raw)]; + + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(and_expected_raw), logical_and<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(or_expected_raw), logical_or<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xor_expected_raw), not_equal_to<>{}); + transform(begin(source_raw), end(source_raw), begin(source2_raw), begin(xnor_expected_raw), equal_to<>{}); + transform(begin(source_raw), end(source_raw), begin(not_expected_raw), logical_not<>{}); + + const vector source1(source_raw, source_raw + length); + const vector source2(source2_raw, source2_raw + length); + + vector and_expected(and_expected_raw, and_expected_raw + length); + vector or_expected(or_expected_raw, or_expected_raw + length); + vector xor_expected(xor_expected_raw, xor_expected_raw + length); + vector xnor_expected(xnor_expected_raw, xnor_expected_raw + length); + vector not_expected(not_expected_raw, not_expected_raw + length); + + and_expected.resize(length + 3, false); + or_expected.resize(length + 3, false); + xor_expected.resize(length + 3, false); + xnor_expected.resize(length + 3, false); + not_expected.resize(length + 3, false); + + vector and_actual(length + 3); + vector or_actual(length + 3); + vector xor_actual(length + 3); + vector xnor_actual(length + 3); + vector not_actual(length + 3); + + // Also test combinations of vector::iterator and vector::const_iterator for the inputs. + const auto first1 = source1.begin(); + const auto cfirst1 = source1.cbegin(); + const auto first2 = source2.begin(); + const auto cfirst2 = source2.cbegin(); + const auto last1 = first1 + length; + const auto clast1 = cfirst1 + length; + + { + auto and_ret = transform(first1, last1, first2, and_actual.begin(), logical_and<>{}); + assert(and_actual == and_expected); + assert(and_ret == and_actual.begin() + length); + + and_actual.assign(and_actual.size(), false); + + and_ret = transform(first1, last1, first2, and_actual.begin(), bit_and<>{}); + assert(and_actual == and_expected); + assert(and_ret == and_actual.begin() + length); + } + + { + auto or_ret = transform(first1, last1, cfirst2, or_actual.begin(), logical_or<>{}); + assert(or_actual == or_expected); + assert(or_ret == or_actual.begin() + length); + + or_actual.assign(or_actual.size(), false); + + or_ret = transform(first1, last1, cfirst2, or_actual.begin(), bit_or<>{}); + assert(or_actual == or_expected); + assert(or_ret == or_actual.begin() + length); + } + + { + auto xor_ret = transform(cfirst1, clast1, first2, xor_actual.begin(), not_equal_to<>{}); + assert(xor_actual == xor_expected); + assert(xor_ret == xor_actual.begin() + length); + + xor_actual.assign(xor_actual.size(), false); + + xor_ret = transform(cfirst1, clast1, first2, xor_actual.begin(), bit_xor<>{}); + assert(xor_actual == xor_expected); + assert(xor_ret == xor_actual.begin() + length); + } + + { + const auto xnor_ret = transform(cfirst1, clast1, cfirst2, xnor_actual.begin(), equal_to<>{}); + assert(xnor_actual == xnor_expected); + assert(xnor_ret == xnor_actual.begin() + length); + + // bit_xnor doesn't exist in the Standard + } + + { + auto not_ret = transform(first1, last1, not_actual.begin(), logical_not<>{}); + assert(not_actual == not_expected); + assert(not_ret == not_actual.begin() + length); + + not_actual.assign(not_actual.size(), false); + + // bit_not emits MSVC and Clang warnings, so it isn't optimized. + // Continue using logical_not to test vector::const_iterator: + not_ret = transform(cfirst1, clast1, not_actual.begin(), logical_not<>{}); + assert(not_actual == not_expected); + assert(not_ret == not_actual.begin() + length); + } +} + +CONSTEXPR20 bool test_transform() { + // Empty range + test_transform_helper(0); + + // One block, ends within block + test_transform_helper(15); + + // One block, ends at block boundary + test_transform_helper(blockSize); + + // Multiple blocks, within block + test_transform_helper(3 * blockSize + 5); + + // Multiple blocks, ends at block boundary + test_transform_helper(4 * blockSize); + return true; +} + CONSTEXPR20 void test_fill_helper(const size_t length) { // No offset { @@ -1385,6 +1530,7 @@ static_assert(test_gh_5345<120, 31>()); static_assert(test_fill()); static_assert(test_find()); static_assert(test_count()); +static_assert(test_transform()); #if defined(__clang__) || defined(__EDG__) // TRANSITION, VSO-2574489 static_assert(test_copy_part_1()); @@ -1396,6 +1542,7 @@ int main() { test_fill(); test_find(); test_count(); + test_transform(); test_copy_part_1(); test_copy_part_2();