From 43cd0f79078f3df3db2ba0e936f806dd95abc229 Mon Sep 17 00:00:00 2001 From: Sepcnt <30561671+sepcnt@users.noreply.github.com> Date: Sun, 19 May 2024 16:54:19 +0000 Subject: [PATCH 1/5] Add support for MSVC and GitHub Actions --- .github/workflows/build.yml | 36 ++++++++++++++++++++++++++++++ build.rs | 9 +++++++- src/hvm.c | 44 +++++++++++++++++++++++++++---------- src/hvm.cu | 41 ++++++++++++++++++++++++---------- 4 files changed, 107 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..b1778b18 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,36 @@ +name: Build and Lint on Multiple Platforms + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - uses: Swatinem/rust-cache@v2 + with: + # To only cache runs from `master`: + save-if: ${{ github.ref == 'refs/heads/master' }} + - uses: Jimver/cuda-toolkit@v0.2.15 + id: cuda-toolkit + with: + cuda: '12.4.1' + if: matrix.os != 'macOS-latest' + + - name: Clippy Check + run: cargo clippy --all-features -- -D warnings + continue-on-error: true + + - name: Build + run: cargo build --all-features diff --git a/build.rs b/build.rs index 5499211e..90dfe86b 100644 --- a/build.rs +++ b/build.rs @@ -6,9 +6,16 @@ fn main() { println!("cargo:rerun-if-changed=src/hvm.c"); println!("cargo:rerun-if-changed=src/hvm.cu"); - match cc::Build::new() + let mut build = cc::Build::new(); + + if cfg!(target_env = "msvc") { + build.flag("/experimental:c11atomics"); + } + + match build .file("src/hvm.c") .opt_level(3) + .std("c11") .warnings(false) .define("TPC_L2", &*tpcl2.to_string()) .try_compile("hvm-c") { diff --git a/src/hvm.c b/src/hvm.c index 44790811..c857359f 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -1,6 +1,11 @@ #include #include +#include +#ifdef _WIN32 +#include +#else #include +#endif #include #include #include @@ -113,9 +118,9 @@ typedef u32 Numb; // Numb ::= 29-bit (rounded up to u32) #define G_RBAG_LEN (TPC * RLEN) typedef struct Net { - APair node_buf[G_NODE_LEN]; // global node buffer - APort vars_buf[G_VARS_LEN]; // global vars buffer - APair rbag_buf[G_RBAG_LEN]; // global rbag buffer + APair* node_buf; // global node buffer, size = G_NODE_LEN + APort* vars_buf; // global vars buffer, size = G_VARS_LEN + APair* rbag_buf; // global rbag buffer, size = G_RBAG_LEN a64 itrs; // interaction count a32 idle; // idle thread counter } Net; @@ -268,9 +273,11 @@ static inline void swap(Port *a, Port *b) { Port x = *a; *a = *b; *b = x; } +#ifndef _WIN32 u32 min(u32 a, u32 b) { return (a < b) ? a : b; } +#endif // A simple spin-wait barrier using atomic operations a64 a_reached = 0; // number of threads that reached the current barrier @@ -284,7 +291,7 @@ void sync_threads() { } else { u32 tries = 0; while (atomic_load_explicit(&a_barrier, memory_order_acquire) == barrier_old) { - sched_yield(); + thrd_yield(); } } } @@ -301,10 +308,20 @@ u32 global_sum(u32 x) { } // TODO: write a time64() function that returns the time as fast as possible as a u64 +// The time should be in nanoseconds, but not related to UTC time static inline u64 time64() { +#ifndef _WIN32 struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (u64)ts.tv_sec * 1000000000ULL + (u64)ts.tv_nsec; +#else + // @developedby: We dont care about system time, this is just a timer. + LARGE_INTEGER freq; + LARGE_INTEGER counter; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + return (u64)((counter.QuadPart * 1000000000ULL) / freq.QuadPart); +#endif } // Ports / Pairs / Rules @@ -645,6 +662,11 @@ static inline void net_init(Net* net) { // is that needed? atomic_store(&net->itrs, 0); atomic_store(&net->idle, 0); + + // allocates global buffers + net->node_buf = malloc(G_NODE_LEN * sizeof(APair)); + net->vars_buf = malloc(G_VARS_LEN * sizeof(APort)); + net->rbag_buf = malloc(G_RBAG_LEN * sizeof(APair)); } // Allocator @@ -1133,7 +1155,7 @@ void evaluator(Net* net, TM* tm, Book* book) { //if (stolen) { //push_redex(net, tm, trg); //} else { - //// do nothing: will sched_yield + //// do nothing: will thrd_yield //} //// If we see a non-stealable redex, try the next one //} else { @@ -1153,7 +1175,7 @@ void evaluator(Net* net, TM* tm, Book* book) { } // Chill... - sched_yield(); + thrd_yield(); // Halt if all threads are idle if (tick % 256 == 0) { if (atomic_load_explicit(&net->idle, memory_order_relaxed) == TPC) { @@ -1179,7 +1201,7 @@ typedef struct { Book* book; } ThreadArg; -void* thread_func(void* arg) { +thrd_start_t thread_func(void* arg) { ThreadArg* data = (ThreadArg*)arg; evaluator(data->net, data->tm, data->book); return NULL; @@ -1203,14 +1225,14 @@ void normalize(Net* net, Book* book) { } // Spawns the evaluation threads - pthread_t threads[TPC]; + thrd_t threads[TPC]; for (u32 t = 0; t < TPC; ++t) { - pthread_create(&threads[t], NULL, thread_func, &thread_arg[t]); + thrd_create(&threads[t], thread_func, &thread_arg[t]); } // Wait for the threads to finish for (u32 t = 0; t < TPC; ++t) { - pthread_join(threads[t], NULL); + thrd_join(threads[t], NULL); } } @@ -1532,7 +1554,7 @@ Port io_sleep(Net* net, Book* book, u32 argc, Port* argv) { struct timespec ts; ts.tv_sec = dur_ns / 1000000000; ts.tv_nsec = dur_ns % 1000000000; - nanosleep(&ts, NULL); + thrd_sleep(&ts, NULL); // Return an eraser return new_port(ERA, 0); diff --git a/src/hvm.cu b/src/hvm.cu index 4fb3aa1d..0e39e718 100644 --- a/src/hvm.cu +++ b/src/hvm.cu @@ -346,13 +346,13 @@ const u32 G_RBAG_LEN = TPB * BPG * RLEN * 3; // max 4m redexes struct GNet { u32 rbag_use_A; // total rbag redex count (buffer A) u32 rbag_use_B; // total rbag redex count (buffer B) - Pair rbag_buf_A[G_RBAG_LEN]; // global redex bag (buffer A) - Pair rbag_buf_B[G_RBAG_LEN]; // global redex bag (buffer B) - Pair node_buf[G_NODE_LEN]; // global node buffer - Port vars_buf[G_VARS_LEN]; // global vars buffer - u32 node_put[TPB*BPG]; - u32 vars_put[TPB*BPG]; - u32 rbag_pos[TPB*BPG]; + Pair* rbag_buf_A; // global redex bag (buffer A), size = G_RBAG_LEN + Pair* rbag_buf_B; // global redex bag (buffer B), size = G_RBAG_LEN + Pair* node_buf; // global node buffer, size = G_NODE_LEN + Port* vars_buf; // global vars buffer, size = G_VARS_LEN + u32* node_put; // size = TPB*BPG + u32* vars_put; // size = TPB*BPG + u32* rbag_pos; // size = TPB*BPG u8 mode; // evaluation mode (curr) u64 itrs; // interaction count u64 iadd; // interaction count adder @@ -1895,10 +1895,29 @@ __global__ void evaluator(GNet* gnet) { // ------------------- GNet* gnet_create() { - GNet *gnet; - cudaMalloc((void**)&gnet, sizeof(GNet)); - cudaMemset(gnet, 0, sizeof(GNet)); - return gnet; + GNet gnet; + memset(&gnet, 0, sizeof(GNet)); + + #define ALLOCATE_HOST_POINTER(__host_pointer, __size) \ + do { \ + cudaMalloc((void**)&(__host_pointer), __size); \ + cudaMemset(__host_pointer, 0, __size); \ + } while(0) + + ALLOCATE_HOST_POINTER(gnet.rbag_buf_A, G_RBAG_LEN * sizeof(Pair)); + ALLOCATE_HOST_POINTER(gnet.rbag_buf_B, G_RBAG_LEN * sizeof(Pair)); + ALLOCATE_HOST_POINTER(gnet.node_buf, G_NODE_LEN * sizeof(Pair)); + ALLOCATE_HOST_POINTER(gnet.vars_buf, G_VARS_LEN * sizeof(Port)); + ALLOCATE_HOST_POINTER(gnet.node_put, BPG * TPB * sizeof(u32)); + ALLOCATE_HOST_POINTER(gnet.vars_put, BPG * TPB * sizeof(u32)); + ALLOCATE_HOST_POINTER(gnet.rbag_pos, BPG * TPB * sizeof(u32)); + + #undef ALLOCATE_HOST_POINTER + + GNet* gnet_d; + cudaMalloc(&gnet_d, sizeof(GNet)); + cudaMemcpy(gnet_d, &gnet, sizeof(GNet), cudaMemcpyHostToDevice); + return gnet_d; } u32 gnet_get_rlen(GNet* gnet, u32 turn) { From 0d51264d2a0a09bb7bd53d161b6809dc666afd03 Mon Sep 17 00:00:00 2001 From: Sepcnt <30561671+sepcnt@users.noreply.github.com> Date: Sun, 19 May 2024 17:17:21 +0000 Subject: [PATCH 2/5] Polyfill for thread.h and fix disk issue of Linux CI --- .github/workflows/build.yml | 2 ++ src/hvm.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b1778b18..d40d1f25 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,6 +26,8 @@ jobs: id: cuda-toolkit with: cuda: '12.4.1' + method: 'network' + sub-packages: '["nvcc"]' if: matrix.os != 'macOS-latest' - name: Clippy Check diff --git a/src/hvm.c b/src/hvm.c index c857359f..897fbcb4 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -1,10 +1,16 @@ #include #include -#include #ifdef _WIN32 #include +#include #else #include +typedef pthread_t thrd_t; +typedef void* thrd_start_t; +#define thrd_create(a, b, c) pthread_create(a, NULL, b, c) +#define thrd_join(a, b) pthread_join(a, b) +#define thrd_yield() sched_yield() +#define thrd_sleep(a, b) nanosleep(a, b) #endif #include #include @@ -662,7 +668,7 @@ static inline void net_init(Net* net) { // is that needed? atomic_store(&net->itrs, 0); atomic_store(&net->idle, 0); - + // allocates global buffers net->node_buf = malloc(G_NODE_LEN * sizeof(APair)); net->vars_buf = malloc(G_VARS_LEN * sizeof(APort)); From 9a99d38501156930afc295f84313be288e5b7ae6 Mon Sep 17 00:00:00 2001 From: Sepcnt <30561671+sepcnt@users.noreply.github.com> Date: Sun, 19 May 2024 17:33:24 +0000 Subject: [PATCH 3/5] Fix CI bugs --- .github/workflows/build.yml | 2 +- src/hvm.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d40d1f25..de854938 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,6 +18,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly + - run: rustup component add clippy - uses: Swatinem/rust-cache@v2 with: # To only cache runs from `master`: @@ -27,7 +28,6 @@ jobs: with: cuda: '12.4.1' method: 'network' - sub-packages: '["nvcc"]' if: matrix.os != 'macOS-latest' - name: Clippy Check diff --git a/src/hvm.c b/src/hvm.c index 897fbcb4..e5dbea01 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -5,6 +5,7 @@ #include #else #include +#include typedef pthread_t thrd_t; typedef void* thrd_start_t; #define thrd_create(a, b, c) pthread_create(a, NULL, b, c) From 8f15382a0192f0e6465bc7d9238e5e82d23c3091 Mon Sep 17 00:00:00 2001 From: Sepcnt <30561671+sepcnt@users.noreply.github.com> Date: Mon, 20 May 2024 02:51:54 +0000 Subject: [PATCH 4/5] Fix GCC std flag --- .github/workflows/build.yml | 5 +++-- build.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de854938..6a34106a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,11 +28,12 @@ jobs: with: cuda: '12.4.1' method: 'network' + sub-packages: '[ "nvcc", "cudart", "nvrtc" ]' if: matrix.os != 'macOS-latest' - name: Clippy Check - run: cargo clippy --all-features -- -D warnings + run: cargo clippy -- -D warnings continue-on-error: true - name: Build - run: cargo build --all-features + run: cargo build diff --git a/build.rs b/build.rs index 90dfe86b..9d98b569 100644 --- a/build.rs +++ b/build.rs @@ -10,12 +10,12 @@ fn main() { if cfg!(target_env = "msvc") { build.flag("/experimental:c11atomics"); + build.std("c11"); } match build .file("src/hvm.c") .opt_level(3) - .std("c11") .warnings(false) .define("TPC_L2", &*tpcl2.to_string()) .try_compile("hvm-c") { From 99dcdd0375f204f9a760de493909e3042dbe45b7 Mon Sep 17 00:00:00 2001 From: Sepcnt <30561671+sepcnt@users.noreply.github.com> Date: Tue, 21 May 2024 01:42:16 +0000 Subject: [PATCH 5/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e483a994..ebca1e2b 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ check [Bend](https://github.com/HigherOrderCO/Bend) instead. Usage ----- -> DISCLAIMER: Windows is currently not supported, please use [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) for now as a workaround. +> For native Windows build, you will need at least Visual Studio 2022 version 17.5 for [C11 Atomics](https://devblogs.microsoft.com/cppblog/c11-atomics-in-visual-studio-2022-version-17-5-preview-2/) support. First install the dependencies: * If you want to use the C runtime, install a C-11 compatible compiler like GCC or Clang.