Skip to content

Commit 3d563cd

Browse files
committed
Fix popcount64 linking issue and improve compatibility
- Fix undefined reference to __popcountdi2 by adding __POPCNT__ check - Use Brian Kernighan's algorithm for better fallback performance - Improve C compatibility by using NULL instead of nullptr - Use stdint.h instead of cstdint for better C compatibility - Prioritize MSVC __popcnt64 over GCC builtin for better reliability This resolves linking errors in environments where compiler builtins are not properly linked, particularly affecting test compilation.
1 parent 5588f3b commit 3d563cd

File tree

1 file changed

+17
-15
lines changed

1 file changed

+17
-15
lines changed

src/cpu.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#if !NCNN_SIMPLESTL
1818
#include <algorithm>
19-
#include <cstdint>
19+
#include <stdint.h>
2020
#include <utility>
2121
#include <vector>
2222
#endif
@@ -1767,7 +1767,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
17671767
if (glpie != NULL)
17681768
{
17691769
DWORD bufferSize = 0;
1770-
glpie(RelationProcessorCore, nullptr, &bufferSize);
1770+
glpie(RelationProcessorCore, NULL, &bufferSize);
17711771
std::vector<BYTE> buffer(bufferSize);
17721772
if (!GetLogicalProcessorInformationEx(RelationProcessorCore,
17731773
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(buffer.data()), &bufferSize))
@@ -2418,15 +2418,15 @@ namespace ncnn {
24182418

24192419
// New unified CpuSet implementation supporting >64 CPUs
24202420
CpuSet::CpuSet()
2421-
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
2421+
: fast_mask(0), extended_mask(NULL), extended_capacity(0), use_extended(false)
24222422
#if defined _WIN32
24232423
,
24242424
legacy_mask_cache(0),
24252425
legacy_mask_valid(false)
24262426
#endif
24272427
#if defined __ANDROID__ || defined __linux__
24282428
,
2429-
cpu_set_cache(nullptr),
2429+
cpu_set_cache(NULL),
24302430
cpu_set_valid(false)
24312431
#endif
24322432
#if __APPLE__
@@ -2438,15 +2438,15 @@ CpuSet::CpuSet()
24382438
}
24392439

24402440
CpuSet::CpuSet(const CpuSet& other)
2441-
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
2441+
: fast_mask(0), extended_mask(NULL), extended_capacity(0), use_extended(false)
24422442
#if defined _WIN32
24432443
,
24442444
legacy_mask_cache(0),
24452445
legacy_mask_valid(false)
24462446
#endif
24472447
#if defined __ANDROID__ || defined __linux__
24482448
,
2449-
cpu_set_cache(nullptr),
2449+
cpu_set_cache(NULL),
24502450
cpu_set_valid(false)
24512451
#endif
24522452
#if __APPLE__
@@ -2487,7 +2487,7 @@ void CpuSet::copy_from(const CpuSet& other)
24872487
if (extended_mask)
24882488
{
24892489
free(extended_mask);
2490-
extended_mask = nullptr;
2490+
extended_mask = NULL;
24912491
}
24922492
extended_capacity = 0;
24932493

@@ -2515,7 +2515,7 @@ void CpuSet::copy_from(const CpuSet& other)
25152515
if (cpu_set_cache)
25162516
{
25172517
CPU_FREE(cpu_set_cache);
2518-
cpu_set_cache = nullptr;
2518+
cpu_set_cache = NULL;
25192519
}
25202520
#endif
25212521
#if __APPLE__
@@ -2666,18 +2666,20 @@ bool CpuSet::is_enabled(int cpu) const
26662666
// Helper function to count bits in a 64-bit integer
26672667
static int popcount64(uint64_t x)
26682668
{
2669-
#if defined(__GNUC__) || defined(__clang__)
2670-
return __builtin_popcountll(x);
2671-
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
2669+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
26722670
// __popcnt64 is only available on x86/x64, not on ARM
26732671
return (int)__popcnt64(x);
2672+
#elif (defined(__GNUC__) || defined(__clang__)) && defined(__POPCNT__) && !defined(__FREESTANDING__) && !NCNN_SIMPLESTL
2673+
// Only use builtin if POPCNT instruction is available
2674+
return __builtin_popcountll(x);
26742675
#else
2675-
// Fallback implementation for ARM and other architectures
2676+
// Fallback implementation for compatibility
2677+
// Use Brian Kernighan's algorithm for better performance
26762678
int count = 0;
26772679
while (x)
26782680
{
2679-
count += x & 1;
2680-
x >>= 1;
2681+
x &= x - 1; // Clear the lowest set bit
2682+
count++;
26812683
}
26822684
return count;
26832685
#endif
@@ -2835,7 +2837,7 @@ const cpu_set_t* CpuSet::get_cpu_set() const
28352837
{
28362838
cpu_set_cache = CPU_ALLOC(CPU_SETSIZE);
28372839
if (!cpu_set_cache)
2838-
return nullptr;
2840+
return NULL;
28392841
}
28402842

28412843
CPU_ZERO_S(CPU_ALLOC_SIZE(CPU_SETSIZE), cpu_set_cache);

0 commit comments

Comments
 (0)