diff --git a/README.md b/README.md index 66e1939..6e91ca0 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,13 @@ regex crate for defined expressions. The different engines have different requirements which are not described here. Please see the related project documentations. +On Ubuntu 20.04 these were necessary installs to get the build done from a stock AWS box +```bash +$ apt install build-essential cmake rustc cargo automake autoconf autopoint autogen \ + libtool libprotobuf-dev libprotobuf-c-dev protobuf-compiler ninja-build \ + ragel libpcap pcaputils pkg-config libboost-dev flex bison +``` + In the case all depencies are fulfilled, just configure and build the cmake based project: ```bash @@ -98,8 +105,29 @@ python3 ../genspreadsheet.py results.csv It will save an Excel spreadsheet with the name `regex-results-YYYYMMDD-HHMMSS.xlsx` in the current directory. +## Compiling with clang + libc++ + +Unfortunately it is not possible to run both standard C++ from GCC/stdlibc++ and clang+libc++ at the +same time, it is just the way that cmake selects a single compiler. + +To run with clang+libc++ use the following recipe: +```bash +mkdir build && cd build +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXE_LINKER_FLAGS="-lc++abi -lc++" \ + -DCMAKE_CXX_COMPILER=/usr/local/bin/clang++ \ + -DCMAKE_C_COMPILER=/usr/local/bin/clang \ + -DCMAKE_CXX_FLAGS_INIT="-std=c++20 -stdlib=libc++ -march=native -mtune=native" \ + -G Ninja .. +``` + ## Results These results were obtained in an AMD Threadripper 3960X (Zen2) at 3.8 GHz running Ubuntu 20.04.5 LTS. -![Updated Performance Results](results_20221012.png "Performance Results") +![Updated Performance Results](results_threadripper.png "Performance Results") + +IceLake Xeon Platinum 8375C @ 2.90GHz (AWS C6i instance) - no mitigations + +![IceLake Server](results_icelake.png "Results Ice Lake") \ No newline at end of file diff --git a/genspreadsheet.py b/genspreadsheet.py index 5feffbc..f13e9e4 100644 --- a/genspreadsheet.py +++ b/genspreadsheet.py @@ -8,13 +8,8 @@ print("Usage: genspreadsheet.py \n") sys.exit(0) -regexre = re.compile('Regex:\s*(.*)') -resultre = re.compile('\[\s*(\S+)\]\s*time:\s*([\d\.]+).*matches:\s*(\d+)') - infilename = sys.argv[1] -current_regex = None results = {} -stats = None scanners = set() with open( infilename, "r" ) as filein: headers = filein.readline().split(';') @@ -37,14 +32,15 @@ workbook = xlsxwriter.Workbook(outfilename) worksheet = workbook.add_worksheet() worksheet.hide_gridlines(2) -worksheet.set_column(0,0,30) +worksheet.set_column(0,0,35) worksheet.set_column(1,len(scanners),10) worksheet.set_row(0,20) # Add a bold format to use to highlight cells. -bold = workbook.add_format({'bold': True}) -boldrot = workbook.add_format({'bold': True}) -boldrot.set_rotation(0) +headerfmt = workbook.add_format({'bold': True}) +headerfmt.set_bg_color('gray') +headerfmt.set_font_color('white') +headerfmt.set_rotation(0) highfmt = workbook.add_format({'bold': True}) highfmt.set_bg_color( 'orange' ) highfmt.set_font_color( 'white' ) @@ -56,19 +52,19 @@ warnfmt.set_font_color( 'black' ) warnfmt.set_align('center') -# Write some data headers. -scanners = list(scanners) +# Write headers. +scanners = sorted(list(scanners)) row = 0 for col,scanner in enumerate(scanners): - worksheet.write( row, col+1, scanner, boldrot ) -worksheet.write( row, 0, "Regex", bold) + worksheet.write( row, col+1, scanner, headerfmt ) +worksheet.write( row, 0, "Regex", headerfmt ) for regex,stats in results.items(): values = sorted([ ms for ms in stats.values() ]) lowcut = values[1] highcut = values[-2] row += 1 - worksheet.write( row, 0, regex, bold ) + worksheet.write( row, 0, regex, headerfmt ) for col,scanner in enumerate(scanners): if scanner not in stats: worksheet.write( row, col+1, "n/a", warnfmt ) diff --git a/results_icelake.png b/results_icelake.png new file mode 100644 index 0000000..a7b5d88 Binary files /dev/null and b/results_icelake.png differ diff --git a/results_threadripper.png b/results_threadripper.png new file mode 100644 index 0000000..c3a58a0 Binary files /dev/null and b/results_threadripper.png differ diff --git a/vendor/CMakeLists.txt b/vendor/CMakeLists.txt index 5da456d..f929e2d 100644 --- a/vendor/CMakeLists.txt +++ b/vendor/CMakeLists.txt @@ -71,13 +71,58 @@ function(AddExternalProject NAME LIB_NAME URL TAG) endif() endfunction() + +# building a minimal boost with just boost::regex +ExternalProject_Add( + libboost + GIT_REPOSITORY "https://github.com/boostorg/boost.git" + GIT_TAG master + GIT_SUBMODULES + tools/build + tools/boost_install + libs/regex + libs/config + libs/headers + libs/throw_exception + libs/exception + libs/assert + GIT_SHALLOW ON + GIT_SUBMODULES_RECURSE OFF + PREFIX ${CMAKE_CURRENT_SOURCE_DIR} + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/boost + TMP_DIR ${PROJECT_BINARY_DIR}/boost-tmp + STAMP_DIR ${PROJECT_BINARY_DIR}/boost-stamp + BINARY_DIR ${PROJECT_BINARY_DIR}/boost-build + DOWNLOAD_DIR ${PROJECT_BINARY_DIR}/boost-down + CONFIGURE_COMMAND + cd ${CMAKE_CURRENT_SOURCE_DIR}/boost && + ./bootstrap.sh + BUILD_COMMAND + cd ${CMAKE_CURRENT_SOURCE_DIR}/boost && + ./b2 headers && + ./b2 install -q -a + --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local + --build-type=minimal + --layout=system + --disable-icu + --with-regex + variant=release link=static runtime-link=static + threading=single address-model=64 architecture=x86 + INSTALL_COMMAND "" +) + +set(INCLUDE_BOOST "local" CACHE STRING "Use boost::regex library form local built, system or disable usage.") +set_property(CACHE INCLUDE_BOOST PROPERTY STRINGS "local" "system" "disabled") +message("-- Include boost: ${INCLUDE_BOOST}") + # hyperscan AddExternalProject( "hyperscan" "hs" "https://github.com/01org/hyperscan.git" "master" - -DCMAKE_BUILD_TYPE=Release -DFAT_RUNTIME=OFF -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local + -DCMAKE_BUILD_TYPE=Release -DFAT_RUNTIME=OFF + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local ) # oniguruma @@ -86,7 +131,8 @@ AddExternalProject( "onig" "https://github.com/kkos/oniguruma.git" "master" - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local + -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DINSTALL_DOCUMENTATION=OFF + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local ) # re2 @@ -95,7 +141,8 @@ AddExternalProject( "re2" "https://github.com/google/re2.git" "main" - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local ) # tre @@ -104,7 +151,10 @@ AddExternalProject( "tre" "https://github.com/laurikari/tre.git" "master" - cd ${CMAKE_CURRENT_SOURCE_DIR}/tre/ && ./utils/autogen.sh && cd ${PROJECT_BINARY_DIR}/tre-build && ${CMAKE_CURRENT_SOURCE_DIR}/tre/configure --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local + cd ${CMAKE_CURRENT_SOURCE_DIR}/tre/ && + ./utils/autogen.sh && cd ${PROJECT_BINARY_DIR}/tre-build && + ${CMAKE_CURRENT_SOURCE_DIR}/tre/configure + --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local ) # pcre2 @@ -113,7 +163,9 @@ AddExternalProject( "pcre2-8" "https://github.com/PhilipHazel/pcre2.git" "master" - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local -DPCRE2_SUPPORT_JIT=ON + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local + -DPCRE2_SUPPORT_JIT=ON ) # c++ standard @@ -127,8 +179,9 @@ AddExternalProject( "ctre" "ctre" "https://github.com/hanickadot/compile-time-regular-expressions.git" - "master" - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local + "main" + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/local ) # yara @@ -137,11 +190,13 @@ AddExternalProject( "yara" "https://github.com/VirusTotal/yara.git" "master" - cd ${CMAKE_CURRENT_SOURCE_DIR}/yara/ && ./bootstrap.sh && cd ${PROJECT_BINARY_DIR}/yara-build && ${CMAKE_CURRENT_SOURCE_DIR}/yara/configure --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local + cd ${CMAKE_CURRENT_SOURCE_DIR}/yara/ && + ./bootstrap.sh && + cd ${PROJECT_BINARY_DIR}/yara-build && + ${CMAKE_CURRENT_SOURCE_DIR}/yara/configure + --prefix=${CMAKE_CURRENT_SOURCE_DIR}/local ) -# boost - I'm not going to build boost here -set(INCLUDE_BOOST "system" CACHE STRING "Use boost::regex library form local built, system or disable usage.") -set_property(CACHE INCLUDE_BOOST PROPERTY STRINGS "local" "system" "disabled") -message("-- Include boost: ${INCLUDE_BOOST}") + +