opencompl · bollu · Jul 7, 2025 · Aug 7, 2025
diff --git a/.github/workflows/evaluation.yml b/.github/workflows/evaluation.yml
@@ -52,11 +52,36 @@ jobs:
         continue-on-error: true
         run: |
           lake -R build
-
+          
       - name: Ensure InstCombine goals are up-to-date
         run: |
           bash SSA/Projects/InstCombine/scripts/test-extract-goals.sh
 
+      - name : Run Width Independent Test
+        continue-on-error: true
+        run: |
+          (cd artifacts/oopsla25-width-indep/; ./test_experiments.sh)
+
+      - uses: actions/github-script@v6
+        continue-on-error: true
+        if: github.event_name == 'pull_request'
+        with:
+          script: |
+            const fs = require('fs')
+            let out = ""
+            out += '### InstCombine Test Run Results\n\n'
+            const body = fs.readFileSync('bv-evaluation/automata-automata-circuit-cactus-plot-data.tex', 'utf8')
+            out += '```latex\n' + body + '\n```'
+            out += '### MBA Test Run Results\n\n'
+            const body = fs.readFileSync('SSA/Experimental/Bits/Fast/Dataset2/dataset2-cactus-plot-data.tex', 'utf8')
+            out += '```latex\n' + body + '\n```'
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: out
+            })
+
       - name: Run LLVM
         continue-on-error: true
         run: |

diff --git a/artifacts/oopsla25-width-indep/run_experiments.sh b/artifacts/oopsla25-width-indep/run_experiments.sh
@@ -5,7 +5,7 @@ pushd $(git rev-parse --show-toplevel)
 echo $PWD
 
 pushd bv-evaluation
-./compare-automata-automata-circuit-jsonl.py --db compare.jsonl --prodrun  -j5
+./compare-automata-automata-circuit-jsonl.py --db compare.jsonl --nfiles 3  -j5
 ./plot-automata-automata-circuit-jsonl.py compare.jsonl
 popd
 

diff --git a/artifacts/oopsla25-width-indep/test_experiments.sh b/artifacts/oopsla25-width-indep/test_experiments.sh
@@ -5,12 +5,12 @@ pushd $(git rev-parse --show-toplevel)
 echo $PWD
 
 pushd bv-evaluation
-./compare-automata-automata-circuit-jsonl.py --db compare.jsonl -j4
+./compare-automata-automata-circuit-jsonl.py --db compare.jsonl -j4 --nfiles 3 --timeout 50
 ./plot-automata-automata-circuit-jsonl.py compare.jsonl
 popd
 
 pushd SSA/Experimental/Bits/Fast/Dataset2
-./runner.py --db mba.sqlite3 -j4 --mba 100 --bv_decide 100 --bv_automata_classic 100 --kinduction_verified 100 --timeout 120
+./runner.py --db mba.sqlite3 -j4 --mba 3 --bv_decide 3 --bv_automata_classic 3 --kinduction_verified 3 --timeout 10
 ./plotter.py mba.sqlite3
 
 popd
diff --git a/bv-evaluation/.gitignore b/bv-evaluation/.gitignore
@@ -1,6 +1,10 @@
 *.xz
 *.jpeg
 *.pdf
+*.jsonl
+*.db
+*.sqlite
+*.log
 *.csv
 *.sqlite3
 .venv/

diff --git a/bv-evaluation/compare-automata-automata-circuit-jsonl.py b/bv-evaluation/compare-automata-automata-circuit-jsonl.py
@@ -26,14 +26,11 @@ def sed():
         return "gsed"
     return "sed"
 
-def run_file(db : str, file: str, file_num : int, dryrun : bool):
+def run_file(db : str, file: str, file_num : int, timeout : int):
     file_path = BENCHMARK_DIR + file
     fileTitle = file.split('.')[0]
     logging.info(f"{fileTitle}: writing 'bv_bench_automata' tactic into file #{file_num}.")
-    if dryrun:
-        subprocess.Popen(f'{sed()} -i -E \'s,simp_alive_benchmark,bv_bench_dryrun,g\' ' + file_path, cwd=ROOT_DIR, shell=True).wait()
-    else:
-        subprocess.Popen(f'{sed()} -i -E \'s,simp_alive_benchmark,bv_bench_automata,g\' ' + file_path, cwd=ROOT_DIR, shell=True).wait()
+    subprocess.Popen(f'{sed()} -i -E \'s,simp_alive_benchmark,bv_bench_automata,g\' ' + file_path, cwd=ROOT_DIR, shell=True).wait()
     logging.info(f"{fileTitle}: {STATUS_PROCESSING}")
 
     cmd = 'lake lean ' + file_path
@@ -43,7 +40,7 @@ def run_file(db : str, file: str, file_num : int, dryrun : bool):
     p = subprocess.Popen(cmd, cwd=ROOT_DIR, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, universal_newlines=True)
     logging.info(f"{fileTitle}({file_num}): running...")
     try:
-        out, err = p.communicate(timeout=TIMEOUT)
+        out, err = p.communicate(timeout=timeout)
     except subprocess.TimeoutExpired as e:
         logging.info(f"{file_path} - time out of {TIMEOUT} seconds reached")
         p.kill()
@@ -71,7 +68,7 @@ def run_file(db : str, file: str, file_num : int, dryrun : bool):
             f.write(json.dumps(record) + "\n")
     return True
 
-def process(db : str, jobs: int, prod_run : bool, dryrun : bool):
+def process(db : str, jobs: int, nfiles : int, timeout : int):
     tactic_auto_path = f'{ROOT_DIR}/SSA/Projects/InstCombine/TacticAuto.lean'
     # os.makedirs(os.path.dirname(db), exist_ok=True)
 
@@ -96,17 +93,16 @@ def process(db : str, jobs: int, prod_run : bool, dryrun : bool):
     for ix, file in enumerate(raw_files):
         if "_proof" in file:
             files.append(file)
-            N_TEST_RUN_FILES = 5
-            if len(files) == N_TEST_RUN_FILES and not prod_run:
-                break # quit if we are not doing a production run after 5 files.
+            if len(files) > nfiles:
+                break
 
     total = len(files)
     logging.info(f"total #files to process: {total}")
     num_completed = 0
     future2file = {}
     with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
         for ix, file in enumerate(files):
-            future = executor.submit(run_file, db, file, ix + 1, dryrun)
+            future = executor.submit(run_file, db, file, ix + 1, timeout)
             future2file[future] = file
 
     total = len(future2file)
@@ -142,10 +138,10 @@ def setup_logging(db_name : str):
   default_db = f'automata-circuit-{current_time}.jsonl'
   parser.add_argument('--db', default=default_db, help='path to jsonl database')
   parser.add_argument('-j', '--jobs', type=int, default=1)
-  parser.add_argument('--dryrun', action='store_true', help="dry run the evaluation")
-  parser.add_argument('--prodrun', action='store_true', help="run production run of evaluation")
+  parser.add_argument('--timeout', type=int, default=TIMEOUT, help="timeout in seconds for each file run (default: 4h)")
+  parser.add_argument('--nfiles', type=int, default=5, help="number of files to run (default: 5)")
   args = parser.parse_args()
   setup_logging(args.db)
   logging.info(args)
-  process(args.db, args.jobs, args.prodrun,  args.dryrun)
+  process(args.db, jobs=args.jobs, nfiles=args.nfiles, timeout=args.timeout)