From 37c8f4875ac235d30ae75346fea5565f83238d5b Mon Sep 17 00:00:00 2001 From: Xin <3457165878@qq.com> Date: Sun, 13 Jul 2025 22:44:37 +0800 Subject: [PATCH 1/5] Add 64-cor-pluse CPU support for Windows system --- src/cpu.cpp | 192 +++++++++++++++++++++++++++++++++++++++++++++------- src/cpu.h | 16 +++-- 2 files changed, 179 insertions(+), 29 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 4ba23ebea1f1..c189b180934d 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -53,6 +53,7 @@ #endif #if defined __ANDROID__ || defined __OHOS__ || __linux__ +#include #if defined __ANDROID__ #if __ANDROID_API__ >= 18 #include // getauxval() @@ -878,9 +879,43 @@ static int get_cpucount() else count = 1; #elif defined _WIN32 - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - count = system_info.dwNumberOfProcessors; + typedef BOOL(WINAPI *LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); + LPFN_GLPIEX glpiex = (LPFN_GLPIEX)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformationEx"); + if (glpiex != NULL) { + DWORD length = 0; + glpiex(RelationAll, NULL, &length); + + if (length > 0) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(length); + + if (buffer && glpiex(RelationAll, buffer, &length)) { + count = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX ptr = buffer; + DWORD offset = 0; + + while (offset < length) { + if (ptr->Relationship == RelationProcessorCore) { + for (WORD i = 0; i < ptr->Processor.GroupCount; i++) { + count += __popcnt64(ptr->Processor.GroupMask[i].Mask); + } + } + offset += ptr->Size; + ptr = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char*)ptr + ptr->Size); + } + } + + if (buffer) { + free(buffer); + } + } + } + //If cpu's count <= 64, use the previouse version. + if (count == 0) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + count = system_info.dwNumberOfProcessors; + } #elif defined __ANDROID__ || defined __linux__ // get cpu count from /proc/cpuinfo FILE* fp = fopen("/proc/cpuinfo", "rb"); @@ -1355,6 +1390,57 @@ static ncnn::CpuSet get_smt_cpu_mask() { ncnn::CpuSet smt_cpu_mask; + typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + LPFN_GLPI glpiex = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformationEx"); + if (glpiex != NULL) //CPU core > 64 + { + DWORD length = 0; + glpiex(RelationProcessorCore, NULL, &length); + + if (length > 0) + { + std::vector buffer(length); + if (glpiex(RelationProcessorCore, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &length)) + { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(); + + while ((char*)current < buffer.data() + length) + { + if (current->Relationship == RelationProcessorCore) + { + int total_logical_count = 0; + for (WORD group = 0; group < current->Processor.GroupCount; group++) + { + total_logical_count += __popcnt64(current->Processor.GroupMask[group].Mask); + } + + if (total_logical_count > 1) + { + for (WORD group = 0; group < current->Processor.GroupCount; group++) + { + KAFFINITY mask = current->Processor.GroupMask[group].Mask; + for (int cpu = 0; cpu < 64 && mask; cpu++) + { + if (mask & (1ULL << cpu)) + { + int global_cpu = group * 64 + cpu; + smt_cpu_mask.enable(global_cpu); + mask &= ~(1ULL << cpu); + } + } + } + } + } + + current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char*)current + current->Size); + } + + return smt_cpu_mask; + } + } + } + + // Under 64, use the old API typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi == NULL) @@ -1375,12 +1461,16 @@ static ncnn::CpuSet get_smt_cpu_mask() { if (ptr->Relationship == RelationProcessorCore) { - ncnn::CpuSet smt_set; - smt_set.mask = ptr->ProcessorMask; - if (smt_set.num_enabled() > 1) + int logical_count = __popcnt64(ptr->ProcessorMask); + if (logical_count > 1) { - // this core is smt - smt_cpu_mask.mask |= smt_set.mask; + ULONG_PTR mask = ptr->ProcessorMask; + for (int cpu = 0; cpu < 64 && mask; cpu++) { + if (mask & (1ULL << cpu)) { + smt_cpu_mask.enable(cpu); + mask &= ~(1ULL << cpu); + } + } } } @@ -1389,7 +1479,6 @@ static ncnn::CpuSet get_smt_cpu_mask() } free(buffer); - return smt_cpu_mask; } @@ -1435,13 +1524,25 @@ static std::vector get_max_freq_mhz() static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) { - DWORD_PTR prev_mask = SetThreadAffinityMask(GetCurrentThread(), thread_affinity_mask.mask); - if (prev_mask == 0) + for (int group = 0; group < thread_affinity_mask.active_groups; group++) { - NCNN_LOGE("SetThreadAffinityMask failed %d", GetLastError()); - return -1; + if (thread_affinity_mask.masks[group] != 0) + { + GROUP_AFFINITY groupAffinity; + groupAffinity.Mask = thread_affinity_mask.masks[group]; + groupAffinity.Group = (WORD)group; + groupAffinity.Reserved[0] = 0; + groupAffinity.Reserved[1] = 0; + groupAffinity.Reserved[2] = 0; + + if (!SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL)) + { + NCNN_LOGE("SetThreadGroupAffinity failed %d", GetLastError()); + return -1; + } + break; + } } - return 0; } #endif // defined _WIN32 @@ -1610,6 +1711,7 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) } #endif // __APPLE__ + static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::CpuSet& mask_little, ncnn::CpuSet& mask_big) { mask_all.disable_all(); @@ -2152,7 +2254,7 @@ static void initialize_global_cpu_info() g_cpucount = get_cpucount(); g_physical_cpucount = get_physical_cpucount(); - g_powersave = 0; + g_powersave = 0; initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big); #if (defined _WIN32 && (__aarch64__ || __arm__)) || ((defined __ANDROID__ || defined __linux__) && __riscv) @@ -2277,34 +2379,74 @@ CpuSet::CpuSet() void CpuSet::enable(int cpu) { - mask |= ((ULONG_PTR)1 << cpu); + if (cpu < 0 || cpu >= max_cpus) return; + + int group = cpu / 64; + int bit = cpu % 64; + + if (group < MAX_CPU_GROUPS) { + masks[group] |= (1ULL << bit); + } } void CpuSet::disable(int cpu) { - mask &= ~((ULONG_PTR)1 << cpu); + if (cpu < 0 || cpu >= max_cpus) return; + + int group = cpu / 64; + int bit = cpu % 64; + + if (group < MAX_CPU_GROUPS) { + masks[group] &= ~(1ULL << bit); + } } void CpuSet::disable_all() { - mask = 0; + for (int i = 0; i < MAX_CPU_GROUPS; i++) { + masks[i] = 0; + } } bool CpuSet::is_enabled(int cpu) const { - return mask & ((ULONG_PTR)1 << cpu); + if (cpu < 0 || cpu >= max_cpus) return false; + + int group = cpu / 64; + int bit = cpu % 64; + + if (group < MAX_CPU_GROUPS) { + return (masks[group] & (1ULL << bit)) != 0; + } + return false; } int CpuSet::num_enabled() const { - int num_enabled = 0; - for (int i = 0; i < (int)sizeof(mask) * 8; i++) - { - if (is_enabled(i)) - num_enabled++; + int count = 0; + for (int i = 0; i < MAX_CPU_GROUPS; i++) { + count += __builtin_popcountll(masks[i]); } + return count; +} - return num_enabled; +ULONG_PTR CpuSet::get_group_mask(int group) const +{ + if (group < 0 || group >= MAX_CPU_GROUPS) { + return 0; + } + return masks[group]; +} + +int CpuSet::get_active_group_count() const +{ + int count = 0; + for (int i = 0; i < MAX_CPU_GROUPS; i++) { + if (masks[i] != 0) { + count++; + } + } + return count; } #elif defined __ANDROID__ || defined __linux__ CpuSet::CpuSet() diff --git a/src/cpu.h b/src/cpu.h index 6a3fcea293ed..652c75997082 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -38,10 +38,19 @@ class NCNN_EXPORT CpuSet void disable_all(); bool is_enabled(int cpu) const; int num_enabled() const; + +#if defined _WIN32 + int get_max_cpus() const { return max_cpus; } + ULONG_PTR get_group_mask(int group) const; + int get_active_group_count() const; +#endif public: #if defined _WIN32 - ULONG_PTR mask; + static const int MAX_CPU_GROUPS = 20; + ULONG_PTR masks[MAX_CPU_GROUPS]; + int max_cpus; + int active_groups; #endif #if defined __ANDROID__ || defined __linux__ cpu_set_t cpu_set; @@ -129,7 +138,6 @@ NCNN_EXPORT int cpu_support_riscv_xtheadvector(); // vlenb = riscv vector length in bytes NCNN_EXPORT int cpu_riscv_vlenb(); -// cpu info NCNN_EXPORT int get_cpu_count(); NCNN_EXPORT int get_little_cpu_count(); NCNN_EXPORT int get_big_cpu_count(); @@ -138,7 +146,7 @@ NCNN_EXPORT int get_physical_cpu_count(); NCNN_EXPORT int get_physical_little_cpu_count(); NCNN_EXPORT int get_physical_big_cpu_count(); -// cpu l2 varies from 64k to 1M, but l3 can be zero +// cpu l2 varies from 64k to 1M, but l3 can be zero NCNN_EXPORT int get_cpu_level2_cache_size(); NCNN_EXPORT int get_cpu_level3_cache_size(); @@ -153,7 +161,7 @@ NCNN_EXPORT int get_cpu_level3_cache_size(); NCNN_EXPORT int get_cpu_powersave(); NCNN_EXPORT int set_cpu_powersave(int powersave); -// convenient wrapper +// convenient wrapper NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity From 9c418cf6dcb7f7d131fece7de1003393f3fb461f Mon Sep 17 00:00:00 2001 From: Xin <3457165878@qq.com> Date: Mon, 14 Jul 2025 21:04:00 +0800 Subject: [PATCH 2/5] fixed some problems --- src/cpu.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index c189b180934d..00bdc2375d90 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -53,7 +53,6 @@ #endif #if defined __ANDROID__ || defined __OHOS__ || __linux__ -#include #if defined __ANDROID__ #if __ANDROID_API__ >= 18 #include // getauxval() @@ -897,7 +896,7 @@ static int get_cpucount() while (offset < length) { if (ptr->Relationship == RelationProcessorCore) { for (WORD i = 0; i < ptr->Processor.GroupCount; i++) { - count += __popcnt64(ptr->Processor.GroupMask[i].Mask); + count += _popcnt64(ptr->Processor.GroupMask[i].Mask); } } offset += ptr->Size; @@ -1390,8 +1389,8 @@ static ncnn::CpuSet get_smt_cpu_mask() { ncnn::CpuSet smt_cpu_mask; - typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); - LPFN_GLPI glpiex = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformationEx"); + typedef BOOL(WINAPI * LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); + LPFN_GLPIEX glpiex = (LPFN_GLPIEX)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformationEx"); if (glpiex != NULL) //CPU core > 64 { DWORD length = 0; @@ -1411,7 +1410,7 @@ static ncnn::CpuSet get_smt_cpu_mask() int total_logical_count = 0; for (WORD group = 0; group < current->Processor.GroupCount; group++) { - total_logical_count += __popcnt64(current->Processor.GroupMask[group].Mask); + total_logical_count += _popcnt64(current->Processor.GroupMask[group].Mask); } if (total_logical_count > 1) @@ -1461,7 +1460,7 @@ static ncnn::CpuSet get_smt_cpu_mask() { if (ptr->Relationship == RelationProcessorCore) { - int logical_count = __popcnt64(ptr->ProcessorMask); + int logical_count = _popcnt64(ptr->ProcessorMask); if (logical_count > 1) { ULONG_PTR mask = ptr->ProcessorMask; From ef90945f3de53a1845f19405539e9c4800dd79c5 Mon Sep 17 00:00:00 2001 From: Xin <3457165878@qq.com> Date: Mon, 14 Jul 2025 21:38:07 +0800 Subject: [PATCH 3/5] Add a small test with some specific cores (by defined not for real) --- build-android.cmd | 49 ------- build.sh | 131 ----------------- tests/CMakeLists.txt | 4 + tests/test_ncnn_cpu_cores.cpp | 265 ++++++++++++++++++++++++++++++++++ 4 files changed, 269 insertions(+), 180 deletions(-) delete mode 100644 build-android.cmd delete mode 100755 build.sh create mode 100644 tests/test_ncnn_cpu_cores.cpp diff --git a/build-android.cmd b/build-android.cmd deleted file mode 100644 index fd1a83700f47..000000000000 --- a/build-android.cmd +++ /dev/null @@ -1,49 +0,0 @@ -:: Set android ndk root -@ECHO OFF -@SETLOCAL -@SET ANDROID_NDK= - -:: Set ninja.exe -:: @SET NINJA_EXE= - -:: android armv7 -mkdir build-android-armv7-vulkan -pushd build-android-armv7-vulkan -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android aarch64 -mkdir build-android-aarch64-vulkan -pushd build-android-aarch64-vulkan -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android x86 -mkdir build-android-x86 -pushd build-android-x86 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android x86_64 -mkdir build-android-x86_64 -pushd build-android-x86_64 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -:: android riscv64 -mkdir build-android-riscv64 -pushd build-android-riscv64 -cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON .. -cmake --build . --parallel %NUMBER_OF_PROCESSORS% -cmake --build . --target install -popd - -@ENDLOCAL diff --git a/build.sh b/build.sh deleted file mode 100755 index 4b03cec34b35..000000000000 --- a/build.sh +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env bash - -##### android armv7 without neon -mkdir -p build-android-armv7-without-neon -pushd build-android-armv7-without-neon -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android armv7 -mkdir -p build-android-armv7 -pushd build-android-armv7 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android aarch64 -mkdir -p build-android-aarch64 -pushd build-android-aarch64 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android x86 -mkdir -p build-android-x86 -pushd build-android-x86 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android x86_64 -mkdir -p build-android-x86_64 -pushd build-android-x86_64 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### android riscv64 -mkdir -p build-android-riscv64 -pushd build-android-riscv64 -cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON .. -make -j4 -make install -popd - -##### linux of hisiv300 (forgot the chip name) toolchain with neon and openmp -mkdir -p build-hisiv300-linux -pushd build-hisiv300-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv300.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of hisiv500 (Hi3516CV200 and Hi3519V101) toolchain with neon and openmp -mkdir -p build-hisiv500-linux -pushd build-hisiv500-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv500.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of hisiv600 (Hi3559V100) toolchain with neon and no openmp (due to only one cpu, close openmp) -mkdir -p build-hisiv600-linux -pushd build-hisiv600-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv600.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of himix100 (Hi3559a) toolchain with neon and openmp -mkdir -p build-himix100-linux -pushd build-himix100-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/himix100.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of arm-linux-gnueabi toolchain -mkdir -p build-arm-linux-gnueabi -pushd build-arm-linux-gnueabi -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of arm-linux-gnueabihf toolchain -mkdir -p build-arm-linux-gnueabihf -pushd build-arm-linux-gnueabihf -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake .. -make -j4 -make install -popd - -##### linux of v831 toolchain with neon and openmp -mkdir -p build-v831-linux -pushd build-v831-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/v831.toolchain.cmake .. -make -j4 -make install -popd - -##### linux for aarch64-linux-gnu toolchain -mkdir -p build-aarch64-linux-gnu -pushd build-aarch64-linux-gnu -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake .. -make -j4 -make install -popd - -##### linux host system with gcc/g++ -mkdir -p build-host-gcc-linux -pushd build-host-gcc-linux -cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc.toolchain.cmake .. -make -j4 -make install -popd - -##### MacOS -mkdir -p build-mac -pushd build-mac -cmake -DNCNN_OPENMP=OFF \ - -DNCNN_BENCHMARK=ON \ - .. -make -j8 -make install -popd diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9d5b6517e643..916269cbab65 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -165,3 +165,7 @@ ncnn_add_layer_test(Tile) ncnn_add_layer_test(UnaryOp) ncnn_add_layer_test(Unfold) ncnn_add_layer_test(Yolov3DetectionOutput) + +if(WIN32) + ncnn_add_test(ncnn_cpu_cores) +endif() diff --git a/tests/test_ncnn_cpu_cores.cpp b/tests/test_ncnn_cpu_cores.cpp new file mode 100644 index 000000000000..cecfcdd1d8f8 --- /dev/null +++ b/tests/test_ncnn_cpu_cores.cpp @@ -0,0 +1,265 @@ +#include +#include +#include +#include +#include +#include "cpu.h" + +static void print_separator(const char* title) { + printf("\n=== %s ===\n", title); +} + +static int test_basic_cpu_info() { + print_separator("Basic CPU Information Test"); + + int cpu_count = ncnn::get_cpu_count(); + int big_cpu_count = ncnn::get_big_cpu_count(); + int little_cpu_count = ncnn::get_little_cpu_count(); + int physical_cpu_count = ncnn::get_physical_cpu_count(); + + printf("CPU Count: %d\n", cpu_count); + printf("Big CPU Count: %d\n", big_cpu_count); + printf("Little CPU Count: %d\n", little_cpu_count); + printf("Physical CPU Count: %d\n", physical_cpu_count); + + if (cpu_count <= 0) { + printf("ERROR: Invalid CPU count\n"); + return -1; + } + + return 0; +} + +static int test_windows_api_comparison() { + print_separator("Windows API Comparison Test"); + + // Get ncnn detected CPU count + int ncnn_cpu_count = ncnn::get_cpu_count(); + + // Get Windows API CPU count + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + DWORD win_cpu_count = sysinfo.dwNumberOfProcessors; + + printf("NCNN detected CPUs: %d\n", ncnn_cpu_count); + printf("Windows GetSystemInfo CPUs: %d\n", win_cpu_count); + + // Test GetLogicalProcessorInformationEx for >64 core support + DWORD buffer_size = 0; + GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size); + + if (buffer_size > 0) { + std::vector buffer(buffer_size); + if (GetLogicalProcessorInformationEx(RelationProcessorCore, + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &buffer_size)) { + + int core_count = 0; + int group_count = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(); + + while ((char*)current < buffer.data() + buffer_size) { + if (current->Relationship == RelationProcessorCore) { + core_count++; + group_count = max(group_count, (int)current->Processor.GroupCount); + } + current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) + ((char*)current + current->Size); + } + + printf("GetLogicalProcessorInformationEx cores: %d\n", core_count); + printf("Processor groups detected: %d\n", group_count); + + if (core_count > 64) { + printf("SUCCESS: Detected >64 core system\n"); + } + } + } + + return 0; +} + +static int test_cpuset_basic_operations() { + print_separator("CpuSet Basic Operations Test"); + + ncnn::CpuSet cpuset; + + // Test initial state + int initial_enabled = cpuset.num_enabled(); + printf("Initial enabled CPUs: %d\n", initial_enabled); + + // Test enabling specific CPUs + int cpu_count = ncnn::get_cpu_count(); + for (int i = 0; i < min(cpu_count, 8); i++) { + cpuset.enable(i); + if (!cpuset.is_enabled(i)) { + printf("ERROR: Failed to enable CPU %d\n", i); + return -1; + } + } + + printf("Enabled first 8 CPUs, total enabled: %d\n", cpuset.num_enabled()); + + // Test disabling + cpuset.disable(0); + if (cpuset.is_enabled(0)) { + printf("ERROR: Failed to disable CPU 0\n"); + return -1; + } + + printf("Disabled CPU 0, total enabled: %d\n", cpuset.num_enabled()); + + // Test disable_all + cpuset.disable_all(); + if (cpuset.num_enabled() != 0) { + printf("ERROR: disable_all failed\n"); + return -1; + } + + printf("After disable_all, enabled CPUs: %d\n", cpuset.num_enabled()); + + return 0; +} + +static int test_cpuset_large_core_numbers() { + print_separator("CpuSet Large Core Numbers Test"); + + ncnn::CpuSet cpuset; + int cpu_count = ncnn::get_cpu_count(); + + // Test enabling all available CPUs + for (int i = 0; i < cpu_count; i++) { + cpuset.enable(i); + } + + int enabled_count = cpuset.num_enabled(); + printf("Enabled all %d CPUs, actual enabled: %d\n", cpu_count, enabled_count); + + if (enabled_count != cpu_count) { + printf("WARNING: Mismatch between expected and actual enabled CPUs\n"); + } + + // Test boundary conditions + if (cpu_count > 64) { + printf("Testing >64 core boundary...\n"); + + cpuset.disable_all(); + + // Enable CPUs around the 64-core boundary + for (int i = 60; i < min(cpu_count, 68); i++) { + cpuset.enable(i); + if (!cpuset.is_enabled(i)) { + printf("ERROR: Failed to enable CPU %d (around 64-core boundary)\n", i); + return -1; + } + } + + printf("Successfully enabled CPUs around 64-core boundary\n"); + } + + return 0; +} + +#ifdef _WIN32 +static int test_windows_specific_features() { + print_separator("Windows Specific Features Test"); + + ncnn::CpuSet cpuset; + + // Test Windows-specific methods + int max_cpus = cpuset.get_max_cpus(); + int active_groups = cpuset.get_active_group_count(); + + printf("Max CPUs: %d\n", max_cpus); + printf("Active processor groups: %d\n", active_groups); + + // Test group masks + for (int group = 0; group < active_groups && group < 4; group++) { + ULONG_PTR mask = cpuset.get_group_mask(group); + printf("Group %d mask: 0x%llx\n", group, (unsigned long long)mask); + } + + // Test enabling CPUs in different groups + if (active_groups > 1) { + printf("Testing multi-group CPU enabling...\n"); + + cpuset.disable_all(); + + // Enable some CPUs in group 0 + for (int i = 0; i < min(4, max_cpus); i++) { + cpuset.enable(i); + } + + // Enable some CPUs in group 1 (if exists) + if (max_cpus > 64) { + for (int i = 64; i < min(68, max_cpus); i++) { + cpuset.enable(i); + } + } + + printf("Multi-group test completed, enabled CPUs: %d\n", cpuset.num_enabled()); + } + + return 0; +} +#endif + +static int test_thread_affinity() { + print_separator("Thread Affinity Test"); + + // Test getting thread affinity masks + const ncnn::CpuSet& mask_all = ncnn::get_cpu_thread_affinity_mask(0); + const ncnn::CpuSet& mask_little = ncnn::get_cpu_thread_affinity_mask(1); + const ncnn::CpuSet& mask_big = ncnn::get_cpu_thread_affinity_mask(2); + + printf("All cores mask enabled CPUs: %d\n", mask_all.num_enabled()); + printf("Little cores mask enabled CPUs: %d\n", mask_little.num_enabled()); + printf("Big cores mask enabled CPUs: %d\n", mask_big.num_enabled()); + + // Test setting thread affinity + ncnn::CpuSet custom_mask; + int cpu_count = ncnn::get_cpu_count(); + + // Enable every other CPU + for (int i = 0; i < cpu_count; i += 2) { + custom_mask.enable(i); + } + + printf("Setting custom affinity with %d CPUs...\n", custom_mask.num_enabled()); + int result = ncnn::set_cpu_thread_affinity(custom_mask); + + if (result == 0) { + printf("Thread affinity set successfully\n"); + } else { + printf("Thread affinity setting failed with code: %d\n", result); + } + + return 0; +} + +int main() { + printf("NCNN CPU Core Support Test for Windows 64+ Cores\n"); + printf("================================================\n"); + + int result = 0; + + result |= test_basic_cpu_info(); + result |= test_windows_api_comparison(); + result |= test_cpuset_basic_operations(); + result |= test_cpuset_large_core_numbers(); + +#ifdef _WIN32 + result |= test_windows_specific_features(); +#endif + + result |= test_thread_affinity(); + + print_separator("Test Summary"); + if (result == 0) { + printf("All tests PASSED\n"); + } else { + printf("Some tests FAILED (return code: %d)\n", result); + } + + return result; +} \ No newline at end of file From 7a369addd67b0c2f7755f003f7a6c5617300867f Mon Sep 17 00:00:00 2001 From: Hansersdog <145438930+Hansersdog@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:44:12 +0000 Subject: [PATCH 4/5] apply code-format changes --- src/cpu.cpp | 98 ++++++++------- src/cpu.h | 11 +- tests/test_ncnn_cpu_cores.cpp | 218 +++++++++++++++++++--------------- 3 files changed, 189 insertions(+), 138 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 00bdc2375d90..67f09f685c25 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -878,24 +878,29 @@ static int get_cpucount() else count = 1; #elif defined _WIN32 - typedef BOOL(WINAPI *LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); + typedef BOOL(WINAPI * LPFN_GLPIEX)(LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); LPFN_GLPIEX glpiex = (LPFN_GLPIEX)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformationEx"); - if (glpiex != NULL) { + if (glpiex != NULL) + { DWORD length = 0; glpiex(RelationAll, NULL, &length); - - if (length > 0) { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = - (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(length); - - if (buffer && glpiex(RelationAll, buffer, &length)) { + + if (length > 0) + { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(length); + + if (buffer && glpiex(RelationAll, buffer, &length)) + { count = 0; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX ptr = buffer; DWORD offset = 0; - - while (offset < length) { - if (ptr->Relationship == RelationProcessorCore) { - for (WORD i = 0; i < ptr->Processor.GroupCount; i++) { + + while (offset < length) + { + if (ptr->Relationship == RelationProcessorCore) + { + for (WORD i = 0; i < ptr->Processor.GroupCount; i++) + { count += _popcnt64(ptr->Processor.GroupMask[i].Mask); } } @@ -903,14 +908,16 @@ static int get_cpucount() ptr = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char*)ptr + ptr->Size); } } - - if (buffer) { + + if (buffer) + { free(buffer); } } } //If cpu's count <= 64, use the previouse version. - if (count == 0) { + if (count == 0) + { SYSTEM_INFO system_info; GetSystemInfo(&system_info); count = system_info.dwNumberOfProcessors; @@ -1395,14 +1402,14 @@ static ncnn::CpuSet get_smt_cpu_mask() { DWORD length = 0; glpiex(RelationProcessorCore, NULL, &length); - + if (length > 0) { std::vector buffer(length); if (glpiex(RelationProcessorCore, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &length)) { PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(); - + while ((char*)current < buffer.data() + length) { if (current->Relationship == RelationProcessorCore) @@ -1412,7 +1419,7 @@ static ncnn::CpuSet get_smt_cpu_mask() { total_logical_count += _popcnt64(current->Processor.GroupMask[group].Mask); } - + if (total_logical_count > 1) { for (WORD group = 0; group < current->Processor.GroupCount; group++) @@ -1430,15 +1437,15 @@ static ncnn::CpuSet get_smt_cpu_mask() } } } - + current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char*)current + current->Size); } - + return smt_cpu_mask; } } } - + // Under 64, use the old API typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); @@ -1464,8 +1471,10 @@ static ncnn::CpuSet get_smt_cpu_mask() if (logical_count > 1) { ULONG_PTR mask = ptr->ProcessorMask; - for (int cpu = 0; cpu < 64 && mask; cpu++) { - if (mask & (1ULL << cpu)) { + for (int cpu = 0; cpu < 64 && mask; cpu++) + { + if (mask & (1ULL << cpu)) + { smt_cpu_mask.enable(cpu); mask &= ~(1ULL << cpu); } @@ -1533,13 +1542,13 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) groupAffinity.Reserved[0] = 0; groupAffinity.Reserved[1] = 0; groupAffinity.Reserved[2] = 0; - + if (!SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL)) { NCNN_LOGE("SetThreadGroupAffinity failed %d", GetLastError()); return -1; } - break; + break; } } return 0; @@ -1710,7 +1719,6 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) } #endif // __APPLE__ - static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::CpuSet& mask_little, ncnn::CpuSet& mask_big) { mask_all.disable_all(); @@ -2253,7 +2261,7 @@ static void initialize_global_cpu_info() g_cpucount = get_cpucount(); g_physical_cpucount = get_physical_cpucount(); - g_powersave = 0; + g_powersave = 0; initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big); #if (defined _WIN32 && (__aarch64__ || __arm__)) || ((defined __ANDROID__ || defined __linux__) && __riscv) @@ -2379,11 +2387,12 @@ CpuSet::CpuSet() void CpuSet::enable(int cpu) { if (cpu < 0 || cpu >= max_cpus) return; - + int group = cpu / 64; int bit = cpu % 64; - - if (group < MAX_CPU_GROUPS) { + + if (group < MAX_CPU_GROUPS) + { masks[group] |= (1ULL << bit); } } @@ -2391,18 +2400,20 @@ void CpuSet::enable(int cpu) void CpuSet::disable(int cpu) { if (cpu < 0 || cpu >= max_cpus) return; - + int group = cpu / 64; int bit = cpu % 64; - - if (group < MAX_CPU_GROUPS) { + + if (group < MAX_CPU_GROUPS) + { masks[group] &= ~(1ULL << bit); } } void CpuSet::disable_all() { - for (int i = 0; i < MAX_CPU_GROUPS; i++) { + for (int i = 0; i < MAX_CPU_GROUPS; i++) + { masks[i] = 0; } } @@ -2410,11 +2421,12 @@ void CpuSet::disable_all() bool CpuSet::is_enabled(int cpu) const { if (cpu < 0 || cpu >= max_cpus) return false; - + int group = cpu / 64; int bit = cpu % 64; - - if (group < MAX_CPU_GROUPS) { + + if (group < MAX_CPU_GROUPS) + { return (masks[group] & (1ULL << bit)) != 0; } return false; @@ -2423,7 +2435,8 @@ bool CpuSet::is_enabled(int cpu) const int CpuSet::num_enabled() const { int count = 0; - for (int i = 0; i < MAX_CPU_GROUPS; i++) { + for (int i = 0; i < MAX_CPU_GROUPS; i++) + { count += __builtin_popcountll(masks[i]); } return count; @@ -2431,7 +2444,8 @@ int CpuSet::num_enabled() const ULONG_PTR CpuSet::get_group_mask(int group) const { - if (group < 0 || group >= MAX_CPU_GROUPS) { + if (group < 0 || group >= MAX_CPU_GROUPS) + { return 0; } return masks[group]; @@ -2440,8 +2454,10 @@ ULONG_PTR CpuSet::get_group_mask(int group) const int CpuSet::get_active_group_count() const { int count = 0; - for (int i = 0; i < MAX_CPU_GROUPS; i++) { - if (masks[i] != 0) { + for (int i = 0; i < MAX_CPU_GROUPS; i++) + { + if (masks[i] != 0) + { count++; } } diff --git a/src/cpu.h b/src/cpu.h index 652c75997082..d24320727c5b 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -38,9 +38,12 @@ class NCNN_EXPORT CpuSet void disable_all(); bool is_enabled(int cpu) const; int num_enabled() const; - + #if defined _WIN32 - int get_max_cpus() const { return max_cpus; } + int get_max_cpus() const + { + return max_cpus; + } ULONG_PTR get_group_mask(int group) const; int get_active_group_count() const; #endif @@ -146,7 +149,7 @@ NCNN_EXPORT int get_physical_cpu_count(); NCNN_EXPORT int get_physical_little_cpu_count(); NCNN_EXPORT int get_physical_big_cpu_count(); -// cpu l2 varies from 64k to 1M, but l3 can be zero +// cpu l2 varies from 64k to 1M, but l3 can be zero NCNN_EXPORT int get_cpu_level2_cache_size(); NCNN_EXPORT int get_cpu_level3_cache_size(); @@ -161,7 +164,7 @@ NCNN_EXPORT int get_cpu_level3_cache_size(); NCNN_EXPORT int get_cpu_powersave(); NCNN_EXPORT int set_cpu_powersave(int powersave); -// convenient wrapper +// convenient wrapper NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave); // set explicit thread affinity diff --git a/tests/test_ncnn_cpu_cores.cpp b/tests/test_ncnn_cpu_cores.cpp index cecfcdd1d8f8..6577dffb5416 100644 --- a/tests/test_ncnn_cpu_cores.cpp +++ b/tests/test_ncnn_cpu_cores.cpp @@ -5,261 +5,293 @@ #include #include "cpu.h" -static void print_separator(const char* title) { +static void print_separator(const char* title) +{ printf("\n=== %s ===\n", title); } -static int test_basic_cpu_info() { +static int test_basic_cpu_info() +{ print_separator("Basic CPU Information Test"); - + int cpu_count = ncnn::get_cpu_count(); int big_cpu_count = ncnn::get_big_cpu_count(); int little_cpu_count = ncnn::get_little_cpu_count(); int physical_cpu_count = ncnn::get_physical_cpu_count(); - + printf("CPU Count: %d\n", cpu_count); printf("Big CPU Count: %d\n", big_cpu_count); printf("Little CPU Count: %d\n", little_cpu_count); printf("Physical CPU Count: %d\n", physical_cpu_count); - - if (cpu_count <= 0) { + + if (cpu_count <= 0) + { printf("ERROR: Invalid CPU count\n"); return -1; } - + return 0; } -static int test_windows_api_comparison() { +static int test_windows_api_comparison() +{ print_separator("Windows API Comparison Test"); - + // Get ncnn detected CPU count int ncnn_cpu_count = ncnn::get_cpu_count(); - + // Get Windows API CPU count SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); DWORD win_cpu_count = sysinfo.dwNumberOfProcessors; - + printf("NCNN detected CPUs: %d\n", ncnn_cpu_count); printf("Windows GetSystemInfo CPUs: %d\n", win_cpu_count); - + // Test GetLogicalProcessorInformationEx for >64 core support DWORD buffer_size = 0; GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size); - - if (buffer_size > 0) { + + if (buffer_size > 0) + { std::vector buffer(buffer_size); - if (GetLogicalProcessorInformationEx(RelationProcessorCore, - (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &buffer_size)) { - + if (GetLogicalProcessorInformationEx(RelationProcessorCore, + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(), &buffer_size)) + { int core_count = 0; int group_count = 0; - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = - (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(); - - while ((char*)current < buffer.data() + buffer_size) { - if (current->Relationship == RelationProcessorCore) { + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)buffer.data(); + + while ((char*)current < buffer.data() + buffer_size) + { + if (current->Relationship == RelationProcessorCore) + { core_count++; group_count = max(group_count, (int)current->Processor.GroupCount); } - current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) - ((char*)current + current->Size); + current = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)((char*)current + current->Size); } - + printf("GetLogicalProcessorInformationEx cores: %d\n", core_count); printf("Processor groups detected: %d\n", group_count); - - if (core_count > 64) { + + if (core_count > 64) + { printf("SUCCESS: Detected >64 core system\n"); } } } - + return 0; } -static int test_cpuset_basic_operations() { +static int test_cpuset_basic_operations() +{ print_separator("CpuSet Basic Operations Test"); - + ncnn::CpuSet cpuset; - + // Test initial state int initial_enabled = cpuset.num_enabled(); printf("Initial enabled CPUs: %d\n", initial_enabled); - + // Test enabling specific CPUs int cpu_count = ncnn::get_cpu_count(); - for (int i = 0; i < min(cpu_count, 8); i++) { + for (int i = 0; i < min(cpu_count, 8); i++) + { cpuset.enable(i); - if (!cpuset.is_enabled(i)) { + if (!cpuset.is_enabled(i)) + { printf("ERROR: Failed to enable CPU %d\n", i); return -1; } } - + printf("Enabled first 8 CPUs, total enabled: %d\n", cpuset.num_enabled()); - + // Test disabling cpuset.disable(0); - if (cpuset.is_enabled(0)) { + if (cpuset.is_enabled(0)) + { printf("ERROR: Failed to disable CPU 0\n"); return -1; } - + printf("Disabled CPU 0, total enabled: %d\n", cpuset.num_enabled()); - + // Test disable_all cpuset.disable_all(); - if (cpuset.num_enabled() != 0) { + if (cpuset.num_enabled() != 0) + { printf("ERROR: disable_all failed\n"); return -1; } - + printf("After disable_all, enabled CPUs: %d\n", cpuset.num_enabled()); - + return 0; } -static int test_cpuset_large_core_numbers() { +static int test_cpuset_large_core_numbers() +{ print_separator("CpuSet Large Core Numbers Test"); - + ncnn::CpuSet cpuset; int cpu_count = ncnn::get_cpu_count(); - + // Test enabling all available CPUs - for (int i = 0; i < cpu_count; i++) { + for (int i = 0; i < cpu_count; i++) + { cpuset.enable(i); } - + int enabled_count = cpuset.num_enabled(); printf("Enabled all %d CPUs, actual enabled: %d\n", cpu_count, enabled_count); - - if (enabled_count != cpu_count) { + + if (enabled_count != cpu_count) + { printf("WARNING: Mismatch between expected and actual enabled CPUs\n"); } - + // Test boundary conditions - if (cpu_count > 64) { + if (cpu_count > 64) + { printf("Testing >64 core boundary...\n"); - + cpuset.disable_all(); - + // Enable CPUs around the 64-core boundary - for (int i = 60; i < min(cpu_count, 68); i++) { + for (int i = 60; i < min(cpu_count, 68); i++) + { cpuset.enable(i); - if (!cpuset.is_enabled(i)) { + if (!cpuset.is_enabled(i)) + { printf("ERROR: Failed to enable CPU %d (around 64-core boundary)\n", i); return -1; } } - + printf("Successfully enabled CPUs around 64-core boundary\n"); } - + return 0; } #ifdef _WIN32 -static int test_windows_specific_features() { +static int test_windows_specific_features() +{ print_separator("Windows Specific Features Test"); - + ncnn::CpuSet cpuset; - + // Test Windows-specific methods int max_cpus = cpuset.get_max_cpus(); int active_groups = cpuset.get_active_group_count(); - + printf("Max CPUs: %d\n", max_cpus); printf("Active processor groups: %d\n", active_groups); - + // Test group masks - for (int group = 0; group < active_groups && group < 4; group++) { + for (int group = 0; group < active_groups && group < 4; group++) + { ULONG_PTR mask = cpuset.get_group_mask(group); printf("Group %d mask: 0x%llx\n", group, (unsigned long long)mask); } - + // Test enabling CPUs in different groups - if (active_groups > 1) { + if (active_groups > 1) + { printf("Testing multi-group CPU enabling...\n"); - + cpuset.disable_all(); - + // Enable some CPUs in group 0 - for (int i = 0; i < min(4, max_cpus); i++) { + for (int i = 0; i < min(4, max_cpus); i++) + { cpuset.enable(i); } - + // Enable some CPUs in group 1 (if exists) - if (max_cpus > 64) { - for (int i = 64; i < min(68, max_cpus); i++) { + if (max_cpus > 64) + { + for (int i = 64; i < min(68, max_cpus); i++) + { cpuset.enable(i); } } - + printf("Multi-group test completed, enabled CPUs: %d\n", cpuset.num_enabled()); } - + return 0; } #endif -static int test_thread_affinity() { +static int test_thread_affinity() +{ print_separator("Thread Affinity Test"); - + // Test getting thread affinity masks const ncnn::CpuSet& mask_all = ncnn::get_cpu_thread_affinity_mask(0); const ncnn::CpuSet& mask_little = ncnn::get_cpu_thread_affinity_mask(1); const ncnn::CpuSet& mask_big = ncnn::get_cpu_thread_affinity_mask(2); - + printf("All cores mask enabled CPUs: %d\n", mask_all.num_enabled()); printf("Little cores mask enabled CPUs: %d\n", mask_little.num_enabled()); printf("Big cores mask enabled CPUs: %d\n", mask_big.num_enabled()); - + // Test setting thread affinity ncnn::CpuSet custom_mask; int cpu_count = ncnn::get_cpu_count(); - + // Enable every other CPU - for (int i = 0; i < cpu_count; i += 2) { + for (int i = 0; i < cpu_count; i += 2) + { custom_mask.enable(i); } - + printf("Setting custom affinity with %d CPUs...\n", custom_mask.num_enabled()); int result = ncnn::set_cpu_thread_affinity(custom_mask); - - if (result == 0) { + + if (result == 0) + { printf("Thread affinity set successfully\n"); - } else { + } + else + { printf("Thread affinity setting failed with code: %d\n", result); } - + return 0; } -int main() { +int main() +{ printf("NCNN CPU Core Support Test for Windows 64+ Cores\n"); printf("================================================\n"); - + int result = 0; - + result |= test_basic_cpu_info(); result |= test_windows_api_comparison(); result |= test_cpuset_basic_operations(); result |= test_cpuset_large_core_numbers(); - + #ifdef _WIN32 result |= test_windows_specific_features(); #endif - + result |= test_thread_affinity(); - + print_separator("Test Summary"); - if (result == 0) { + if (result == 0) + { printf("All tests PASSED\n"); - } else { + } + else + { printf("Some tests FAILED (return code: %d)\n", result); } - + return result; } \ No newline at end of file From 1edcc31dd58b18fd610ab1385100d2495a49df2c Mon Sep 17 00:00:00 2001 From: Xin <3457165878@qq.com> Date: Thu, 24 Jul 2025 18:05:00 +0800 Subject: [PATCH 5/5] Recover build scripts --- build-android.cmd | 49 +++++++++++++++++ build.sh | 131 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 build-android.cmd create mode 100755 build.sh diff --git a/build-android.cmd b/build-android.cmd new file mode 100644 index 000000000000..fd1a83700f47 --- /dev/null +++ b/build-android.cmd @@ -0,0 +1,49 @@ +:: Set android ndk root +@ECHO OFF +@SETLOCAL +@SET ANDROID_NDK= + +:: Set ninja.exe +:: @SET NINJA_EXE= + +:: android armv7 +mkdir build-android-armv7-vulkan +pushd build-android-armv7-vulkan +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +cmake --build . --parallel %NUMBER_OF_PROCESSORS% +cmake --build . --target install +popd + +:: android aarch64 +mkdir build-android-aarch64-vulkan +pushd build-android-aarch64-vulkan +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. +cmake --build . --parallel %NUMBER_OF_PROCESSORS% +cmake --build . --target install +popd + +:: android x86 +mkdir build-android-x86 +pushd build-android-x86 +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +cmake --build . --parallel %NUMBER_OF_PROCESSORS% +cmake --build . --target install +popd + +:: android x86_64 +mkdir build-android-x86_64 +pushd build-android-x86_64 +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. +cmake --build . --parallel %NUMBER_OF_PROCESSORS% +cmake --build . --target install +popd + +:: android riscv64 +mkdir build-android-riscv64 +pushd build-android-riscv64 +cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON .. +cmake --build . --parallel %NUMBER_OF_PROCESSORS% +cmake --build . --target install +popd + +@ENDLOCAL diff --git a/build.sh b/build.sh new file mode 100755 index 000000000000..4b03cec34b35 --- /dev/null +++ b/build.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash + +##### android armv7 without neon +mkdir -p build-android-armv7-without-neon +pushd build-android-armv7-without-neon +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### android armv7 +mkdir -p build-android-armv7 +pushd build-android-armv7 +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### android aarch64 +mkdir -p build-android-aarch64 +pushd build-android-aarch64 +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### android x86 +mkdir -p build-android-x86 +pushd build-android-x86 +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### android x86_64 +mkdir -p build-android-x86_64 +pushd build-android-x86_64 +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### android riscv64 +mkdir -p build-android-riscv64 +pushd build-android-riscv64 +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="riscv64" -DANDROID_PLATFORM=android-35 -DNCNN_VULKAN=ON .. +make -j4 +make install +popd + +##### linux of hisiv300 (forgot the chip name) toolchain with neon and openmp +mkdir -p build-hisiv300-linux +pushd build-hisiv300-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv300.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of hisiv500 (Hi3516CV200 and Hi3519V101) toolchain with neon and openmp +mkdir -p build-hisiv500-linux +pushd build-hisiv500-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv500.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of hisiv600 (Hi3559V100) toolchain with neon and no openmp (due to only one cpu, close openmp) +mkdir -p build-hisiv600-linux +pushd build-hisiv600-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv600.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of himix100 (Hi3559a) toolchain with neon and openmp +mkdir -p build-himix100-linux +pushd build-himix100-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/himix100.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of arm-linux-gnueabi toolchain +mkdir -p build-arm-linux-gnueabi +pushd build-arm-linux-gnueabi +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of arm-linux-gnueabihf toolchain +mkdir -p build-arm-linux-gnueabihf +pushd build-arm-linux-gnueabihf +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake .. +make -j4 +make install +popd + +##### linux of v831 toolchain with neon and openmp +mkdir -p build-v831-linux +pushd build-v831-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/v831.toolchain.cmake .. +make -j4 +make install +popd + +##### linux for aarch64-linux-gnu toolchain +mkdir -p build-aarch64-linux-gnu +pushd build-aarch64-linux-gnu +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake .. +make -j4 +make install +popd + +##### linux host system with gcc/g++ +mkdir -p build-host-gcc-linux +pushd build-host-gcc-linux +cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc.toolchain.cmake .. +make -j4 +make install +popd + +##### MacOS +mkdir -p build-mac +pushd build-mac +cmake -DNCNN_OPENMP=OFF \ + -DNCNN_BENCHMARK=ON \ + .. +make -j8 +make install +popd