diff --git a/src/ast.rs b/src/ast.rs index 56db241c..862b530c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,6 @@ use TSPL::{new_parser, Parser}; use highlight_error::highlight_error; -use crate::hvm; +use crate::{hvm, interop}; use std::fmt::{Debug, Display}; use std::collections::{BTreeMap, BTreeSet}; @@ -364,7 +364,7 @@ impl Book { // -------- impl Tree { - pub fn readback(net: &hvm::GNet, port: hvm::Port, fids: &BTreeMap) -> Option { + pub fn readback(net: &mut N, port: hvm::Port, fids: &BTreeMap) -> Option { //println!("reading {}", port.show()); match port.get_tag() { hvm::VAR => { @@ -416,7 +416,7 @@ impl Tree { } impl Net { - pub fn readback(net: &hvm::GNet, book: &hvm::Book) -> Option { + pub fn readback(net: &mut N, book: &hvm::Book) -> Option { let mut fids = BTreeMap::new(); for (fid, def) in book.defs.iter().enumerate() { fids.insert(fid as hvm::Val, def.name.clone()); diff --git a/src/hvm.c b/src/hvm.c index 786de2d2..f4abd852 100644 --- a/src/hvm.c +++ b/src/hvm.c @@ -1755,10 +1755,24 @@ void pretty_print_port(Net* net, Book* book, Port port) { void do_run_io(Net* net, Book* book, Port port); #endif -// Main +// Output Net +// Used by other languages calling `hvm_c` // ---- +typedef struct OutputNet { + void *original; + APair *node_buf; + APort *vars_buf; + a64 itrs; +} OutputNet; + +void free_output_net_c(OutputNet* net) { + free((Net*)net->original); + free(net); +} -void hvm_c(u32* book_buffer) { +// Main +// ---- +OutputNet* hvm_c(u32* book_buffer, bool return_output) { // Creates static TMs alloc_static_tms(); @@ -1781,33 +1795,50 @@ void hvm_c(u32* book_buffer) { #ifdef IO do_run_io(net, book, ROOT); + // IO actions into `stdout` and `stderr` may appear + // after Rust `print`s if we don't flush + fflush(stdout); + fflush(stderr); #else normalize(net, book); #endif - // Prints the result - printf("Result: "); - pretty_print_port(net, book, enter(net, ROOT)); - printf("\n"); - // Stops the timer double duration = (time64() - start) / 1000000000.0; // seconds - // Prints interactions and time - u64 itrs = atomic_load(&net->itrs); - printf("- ITRS: %" PRIu64 "\n", itrs); - printf("- TIME: %.2fs\n", duration); - printf("- MIPS: %.2f\n", (double)itrs / duration / 1000000.0); + if (!return_output) { + // Prints the result + printf("Result: "); + pretty_print_port(net, book, enter(net, ROOT)); + printf("\n"); + + // Prints interactions and time + u64 itrs = atomic_load(&net->itrs); + printf("- ITRS: %" PRIu64 "\n", itrs); + printf("- TIME: %.2fs\n", duration); + printf("- MIPS: %.2f\n", (double)itrs / duration / 1000000.0); + } // Frees everything free_static_tms(); - free(net); free(book); + + if (return_output) { + OutputNet *output = malloc(sizeof(OutputNet)); + output->original = (void*)net; + output->node_buf = &net->node_buf[0]; + output->vars_buf = &net->vars_buf[0]; + output->itrs = atomic_load(&net->itrs); + return output; + } + + free(net); + return NULL; } #ifdef WITH_MAIN int main() { - hvm_c((u32*)BOOK_BUF); + hvm_c((u32*)BOOK_BUF, NULL); return 0; } #endif diff --git a/src/hvm.cu b/src/hvm.cu index 2b740ef8..2f87b409 100644 --- a/src/hvm.cu +++ b/src/hvm.cu @@ -2279,6 +2279,37 @@ __global__ void print_result(GNet* gnet) { //COMPILED_BOOK_BUF// +// Output Net +// Used by other languages calling `hvm_cu` +// ---- +struct OutputNet { + void *original; + Pair *node_buf; + Port *vars_buf; + u64 itrs; +}; + +OutputNet* create_output_net(GNet* gnet) { + OutputNet* output = (OutputNet*)malloc(sizeof(OutputNet)); + + // Allocate host memory for the net + GNet* h_gnet = (GNet*)malloc(sizeof(GNet)); + + // Copy the net from device to host + cudaMemcpy(h_gnet, gnet, sizeof(GNet), cudaMemcpyDeviceToHost); + + output->original = (void*)h_gnet; + output->node_buf = h_gnet->node_buf; + output->vars_buf = h_gnet->vars_buf; + output->itrs = h_gnet->itrs; + return output; +} + +extern "C" void free_output_net_cuda(OutputNet* net) { + free((GNet*)net->original); + free(net); +} + // Main // ---- @@ -2286,7 +2317,7 @@ __global__ void print_result(GNet* gnet) { void do_run_io(GNet* gnet, Book* book, Port port); #endif -extern "C" void hvm_cu(u32* book_buffer) { +extern "C" OutputNet* hvm_cu(u32* book_buffer, bool return_output) { // Start the timer clock_t start = clock(); @@ -2308,6 +2339,10 @@ extern "C" void hvm_cu(u32* book_buffer) { #ifdef IO do_run_io(gnet, book, ROOT); + // IO actions into `stdout` and `stderr` may appear + // after Rust `print`s if we don't flush + fflush(stdout); + fflush(stderr); #else gnet_normalize(gnet); #endif @@ -2319,7 +2354,10 @@ extern "C" void hvm_cu(u32* book_buffer) { double duration = ((double)(end - start)) / CLOCKS_PER_SEC; // Prints the result - print_result<<<1,1>>>(gnet); + // If `output` is set, the Rust implementation will print the net + if (!return_output) { + print_result<<<1,1>>>(gnet); + } // Reports errors cudaError_t err = cudaGetLastError(); @@ -2355,15 +2393,21 @@ extern "C" void hvm_cu(u32* book_buffer) { //cudaMemcpy(&itrs, &gnet->itrs, sizeof(u64), cudaMemcpyDeviceToHost); // Prints interactions, time and MIPS + // If `output` is set, the Rust implementation will print the net + if (return_output) { + return create_output_net(gnet); + } + printf("- ITRS: %llu\n", gnet_get_itrs(gnet)); printf("- LEAK: %llu\n", gnet_get_leak(gnet)); printf("- TIME: %.2fs\n", duration); printf("- MIPS: %.2f\n", (double)gnet_get_itrs(gnet) / duration / 1000000.0); + return NULL; } #ifdef WITH_MAIN int main() { - hvm_cu((u32*)BOOK_BUF); + hvm_cu((u32*)BOOK_BUF, false); return 0; } #endif diff --git a/src/hvm.rs b/src/hvm.rs index d7007aad..941cf8d7 100644 --- a/src/hvm.rs +++ b/src/hvm.rs @@ -12,15 +12,19 @@ pub type Val = u32; // Val ::= 29-bit (rounded up to u32) pub type Rule = u8; // Rule ::= 8-bit (fits a u8) // Port +#[repr(C)] #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Hash)] pub struct Port(pub Val); // Pair +#[repr(C)] pub struct Pair(pub u64); // Atomics pub type AVal = AtomicU32; +#[repr(C)] pub struct APort(pub AVal); +#[repr(C)] pub struct APair(pub AtomicU64); // Number diff --git a/src/interop.rs b/src/interop.rs new file mode 100644 index 00000000..f6611b1e --- /dev/null +++ b/src/interop.rs @@ -0,0 +1,218 @@ +use std::alloc; +use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; + +use crate::hvm::*; + +#[cfg(feature = "c")] +extern "C" { + pub fn hvm_c(book_buffer: *const u32, return_output: u8) -> *mut OutputNetC; + pub fn free_output_net_c(net: *mut OutputNetC); +} + +#[cfg(feature = "cuda")] +extern "C" { + pub fn hvm_cu(book_buffer: *const u32, return_output: bool) -> *mut OutputNetCuda; + pub fn free_output_net_cuda(net: *mut OutputNetCuda); +} + +// Abstract Global Net +// Allows any global net to be read back +// ------------- + +pub trait NetReadback { + fn run(book: &Book) -> Self; + fn node_load(&self, loc: usize) -> Pair; + fn vars_exchange(&mut self, var: usize, val: Port) -> Port; + fn itrs(&self) -> u64; + + fn vars_take(&mut self, var: usize) -> Port { + self.vars_exchange(var, Port(0)) + } + + fn enter(&mut self, mut var: Port) -> Port { + // While `B` is VAR: extend it (as an optimization) + while var.get_tag() == VAR { + // Takes the current `B` substitution as `B'` + let val = self.vars_exchange(var.get_val() as usize, NONE); + // If there was no `B'`, stop, as there is no extension + if val == NONE || val == Port(0) { + break; + } + // Otherwise, delete `B` (we own both) and continue as `A ~> B'` + self.vars_take(var.get_val() as usize); + var = val; + } + return var; + } +} + +impl<'a> NetReadback for GNet<'a> { + fn run(book: &Book) -> Self { + // Initializes the global net + let net = GNet::new(1 << 29, 1 << 29); + + // Initializes threads + let mut tm = TMem::new(0, 1); + + // Creates an initial redex that calls main + let main_id = book.defs.iter().position(|def| def.name == "main").unwrap(); + tm.rbag.push_redex(Pair::new(Port::new(REF, main_id as u32), ROOT)); + net.vars_create(ROOT.get_val() as usize, NONE); + + // Evaluates + tm.evaluator(&net, &book); + + net + } + fn node_load(&self, loc: usize) -> Pair { self.node_load(loc) } + fn vars_exchange(&mut self, var: usize, val: Port) -> Port { GNet::vars_exchange(self, var, val) } + fn itrs(&self) -> u64 { self.itrs.load(Ordering::Relaxed) } +} + +// Global Net equivalent to the C implementation. +// NOTE: If the C struct `Net` changes, this has to change as well. +// TODO: use `bindgen` crate (https://github.com/rust-lang/rust-bindgen) to generate C structs +// ------------- + +#[cfg(feature = "c")] +#[repr(C)] +pub struct NetC { + raw: *mut OutputNetC +} + +#[cfg(feature = "c")] +#[repr(C)] +pub struct OutputNetC { + pub original: *mut std::ffi::c_void, + pub node_buf: *mut APair, // global node buffer + pub vars_buf: *mut APort, // global vars buffer + pub itrs: AtomicU64, // interaction count +} + +#[cfg(feature = "c")] +impl NetC { + pub fn net<'a>(&'a self) -> &'a OutputNetC { + unsafe { &*self.raw } + } + + pub fn net_mut<'a>(&'a mut self) -> &'a mut OutputNetC { + unsafe { &mut *self.raw } + } +} + +#[cfg(feature = "c")] +impl Drop for NetC { + fn drop(&mut self) { + // Deallocate network's memory + unsafe { free_output_net_c(self.raw); } + } +} + +#[cfg(feature = "c")] +impl NetReadback for NetC { + fn run(book: &Book) -> Self { + // Serialize book + let mut data : Vec = Vec::new(); + book.to_buffer(&mut data); + //println!("{:?}", data); + let book_buffer = data.as_mut_ptr() as *mut u32; + + // Run net + let raw = unsafe { hvm_c(data.as_mut_ptr() as *mut u32, 1) }; + + NetC { raw } + } + + fn node_load(&self, loc:usize) -> Pair { + unsafe { + Pair((*self.net().node_buf.add(loc)).0.load(Ordering::Relaxed)) + } + } + + fn vars_exchange(&mut self, var: usize, val: Port) -> Port { + unsafe { + Port((*self.net().vars_buf.add(var)).0.swap(val.0, Ordering::Relaxed) as u32) + } + } + + fn itrs(&self) -> u64 { + self.net().itrs.load(Ordering::Relaxed) + } +} + +// Global Net equivalent to the CUDA implementation. +// NOTE: If the CUDA struct `Net` changes, this has to change as well. +// ------------- + +// TODO +// Problem: CUDA's `GNet` is allocated using `cudaMalloc` +// Solution: Write a CUDA kernel to compact GPU memory and then `memcpy` it to RAM + +#[cfg(feature = "cuda")] +#[repr(C)] +pub struct NetCuda { + raw: *mut OutputNetCuda +} + +#[cfg(feature = "cuda")] +#[repr(C)] +pub struct OutputNetCuda { + pub original: *mut std::ffi::c_void, + pub node_buf: *mut Pair, // global node buffer + pub vars_buf: *mut Port, // global vars buffer + pub itrs: u64, // interaction count +} + +#[cfg(feature = "cuda")] +impl NetCuda { + pub fn net(&self) -> &OutputNetCuda { + unsafe { &*self.raw } + } + + pub fn net_mut(&mut self) -> &mut OutputNetCuda { + unsafe { &mut *self.raw } + } +} + +#[cfg(feature = "cuda")] +impl Drop for NetCuda { + fn drop(&mut self) { + // Deallocate network's memory + unsafe { free_output_net_cuda(self.raw); } + } +} + +#[cfg(feature = "cuda")] +impl NetReadback for NetCuda { + fn run(book: &Book) -> Self { + // Serialize book + let mut data : Vec = Vec::new(); + book.to_buffer(&mut data); + //println!("{:?}", data); + let book_buffer = data.as_mut_ptr() as *mut u32; + + // Run net + let raw = unsafe { hvm_cu(data.as_mut_ptr() as *mut u32, true) }; + + NetCuda { raw } + } + + fn node_load(&self, loc:usize) -> Pair { + unsafe { + Pair((*self.net().node_buf.add(loc)).0) + } + } + + fn vars_exchange(&mut self, var: usize, val: Port) -> Port { + unsafe { + let net = self.net_mut(); + let old = *net.vars_buf.add(var); + *net.vars_buf.add(var) = val; + old + } + } + + fn itrs(&self) -> u64 { + self.net().itrs + } +} diff --git a/src/lib.rs b/src/lib.rs index 07bd255a..c4f02d71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,4 @@ pub mod ast; pub mod cmp; pub mod hvm; +pub mod interop; diff --git a/src/main.rs b/src/main.rs index dee219ab..78549858 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,22 +3,14 @@ #![allow(unused_variables)] use clap::{Arg, ArgAction, Command}; -use ::hvm::{ast, cmp, hvm}; +use ::hvm::{ast, cmp, hvm, interop, interop::NetReadback}; use std::fs; +use std::alloc; use std::io::Write; use std::path::PathBuf; +use std::time::Instant; use std::process::Command as SysCommand; -#[cfg(feature = "c")] -extern "C" { - fn hvm_c(book_buffer: *const u32); -} - -#[cfg(feature = "cuda")] -extern "C" { - fn hvm_cu(book_buffer: *const u32); -} - fn main() { let matches = Command::new("hvm") .about("HVM2: Higher-order Virtual Machine 2 (32-bit Version)") @@ -69,18 +61,14 @@ fn main() { let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); - run(&book); + run::(&book); } Some(("run-c", sub_matches)) => { let file = sub_matches.get_one::("file").expect("required"); let code = fs::read_to_string(file).expect("Unable to read file"); let book = ast::Book::parse(&code).unwrap_or_else(|er| panic!("{}",er)).build(); - let mut data : Vec = Vec::new(); - book.to_buffer(&mut data); #[cfg(feature = "c")] - unsafe { - hvm_c(data.as_mut_ptr() as *mut u32); - } + run::(&book); #[cfg(not(feature = "c"))] println!("C runtime not available!\n"); } @@ -91,9 +79,7 @@ fn main() { let mut data : Vec = Vec::new(); book.to_buffer(&mut data); #[cfg(feature = "cuda")] - unsafe { - hvm_cu(data.as_mut_ptr() as *mut u32); - } + run::(&book); #[cfg(not(feature = "cuda"))] println!("CUDA runtime not available!\n If you've installed CUDA and nvcc after HVM, please reinstall HVM."); } @@ -157,39 +143,28 @@ fn main() { } } -pub fn run(book: &hvm::Book) { - // Initializes the global net - let net = hvm::GNet::new(1 << 29, 1 << 29); - - // Initializes threads - let mut tm = hvm::TMem::new(0, 1); - - // Creates an initial redex that calls main - let main_id = book.defs.iter().position(|def| def.name == "main").unwrap(); - tm.rbag.push_redex(hvm::Pair::new(hvm::Port::new(hvm::REF, main_id as u32), hvm::ROOT)); - net.vars_create(hvm::ROOT.get_val() as usize, hvm::NONE); - - // Starts the timer - let start = std::time::Instant::now(); +pub fn run(book: &hvm::Book) { + // Start timer + let timer = Instant::now(); - // Evaluates - tm.evaluator(&net, &book); + // Normalize net + let mut net = N::run(book); // Stops the timer - let duration = start.elapsed(); + let duration = timer.elapsed(); //println!("{}", net.show()); // Prints the result - if let Some(tree) = ast::Net::readback(&net, book) { + if let Some(tree) = ast::Net::readback(&mut net, book) { println!("Result: {}", tree.show()); } else { println!("Readback failed. Printing GNet memdump...\n"); - println!("{}", net.show()); + // println!("{}", net.show()); } // Prints interactions and time - let itrs = net.itrs.load(std::sync::atomic::Ordering::Relaxed); + let itrs = net.itrs(); println!("- ITRS: {}", itrs); println!("- TIME: {:.2}s", duration.as_secs_f64()); println!("- MIPS: {:.2}", itrs as f64 / duration.as_secs_f64() / 1_000_000.0);