diff --git a/btcrecover.py b/btcrecover.py
index 3076a10e..8bebf5d0 100755
--- a/btcrecover.py
+++ b/btcrecover.py
@@ -28,7 +28,389 @@
 import compatibility_check
 
 from btcrecover import btcrpass, success_alert
-import sys, multiprocessing
+import itertools
+import os
+import re
+import shlex
+import subprocess
+import sys
+import multiprocessing
+
+
+_SCAN_OPTION_MODES = {
+    "--performance-scan": "flag",
+    "--performance-scan-threads": "greedy",
+    "--performance-scan-global-ws": "greedy",
+    "--performance-scan-local-ws": "greedy",
+    "--performance-scan-opencl-ws": "greedy",
+    "--threads": "single",
+    "--global-ws": "greedy",
+    "--local-ws": "greedy",
+    "--opencl-workgroup-size": "greedy",
+    "--performance-runtime": "single",
+}
+
+
+def _strip_scan_related_args(argv):
+    filtered = []
+    i = 0
+    length = len(argv)
+    while i < length:
+        token = argv[i]
+        mode = _SCAN_OPTION_MODES.get(token)
+        if not mode:
+            filtered.append(token)
+            i += 1
+            continue
+        i += 1
+        if mode == "flag":
+            continue
+        if mode == "single":
+            if i < length:
+                i += 1
+            continue
+        if mode == "greedy":
+            while i < length and not argv[i].startswith("--"):
+                i += 1
+            continue
+    return filtered
+
+
+def _format_local_ws(value):
+    return "auto" if value is None else str(value)
+
+
+def _format_opencl_workgroup(value):
+    return "auto" if value is None else str(value)
+
+
+def _derive_performance_scan_sets():
+    runtime_limit = btcrpass.args.performance_runtime or 10.0
+    if btcrpass.args.performance_runtime <= 0:
+        print(
+            "No --performance-runtime specified; defaulting to 10 seconds per benchmark.",
+            file=sys.stderr,
+        )
+
+    threads_candidates = (
+        sorted(
+            {
+                max(1, (btcrpass.args.threads or multiprocessing.cpu_count()) // 2),
+                max(1, btcrpass.args.threads or multiprocessing.cpu_count()),
+                min(64, (btcrpass.args.threads or multiprocessing.cpu_count()) * 2),
+            }
+        )
+        if not btcrpass.args.performance_scan_threads
+        else sorted(set(btcrpass.args.performance_scan_threads))
+    )
+
+    using_gpu = btcrpass.args.enable_gpu
+    using_opencl = getattr(btcrpass.args, "enable_opencl", False)
+
+    if using_gpu and using_opencl:
+        return runtime_limit, {"mode": "conflict"}
+
+    if using_gpu:
+        base_global_ws = btcrpass.args.global_ws[0] if btcrpass.args.global_ws else 4096
+        if btcrpass.args.performance_scan_global_ws:
+            global_ws_candidates = sorted({value for value in btcrpass.args.performance_scan_global_ws})
+        else:
+            defaults = {
+                base_global_ws,
+                max(32, base_global_ws // 2) if base_global_ws // 2 else base_global_ws,
+                base_global_ws * 2,
+            }
+            global_ws_candidates = sorted(value for value in defaults if value and value > 0)
+
+        base_local_ws = btcrpass.args.local_ws[0] if btcrpass.args.local_ws else None
+        if btcrpass.args.performance_scan_local_ws:
+            local_ws_candidates = btcrpass.args.performance_scan_local_ws
+        else:
+            local_defaults = [None]
+            if base_local_ws:
+                local_defaults.extend(
+                    [max(1, base_local_ws // 2), base_local_ws, base_local_ws * 2]
+                )
+            else:
+                local_defaults.extend([32, 64, 128])
+            local_ws_candidates = []
+            for value in local_defaults:
+                if value is None or value > 0:
+                    if value not in local_ws_candidates:
+                        local_ws_candidates.append(value)
+
+        return (
+            runtime_limit,
+            {
+                "mode": "gpu",
+                "threads": threads_candidates,
+                "global_ws": global_ws_candidates,
+                "local_ws": local_ws_candidates,
+            },
+        )
+
+    if using_opencl:
+        if btcrpass.args.performance_scan_opencl_ws:
+            workgroup_candidates = sorted(
+                {value for value in btcrpass.args.performance_scan_opencl_ws if value > 0}
+            )
+        else:
+            wallet = getattr(btcrpass, "loaded_wallet", None)
+            base_workgroup = None
+            if btcrpass.args.opencl_workgroup_size:
+                base_workgroup = btcrpass.args.opencl_workgroup_size[0]
+            elif wallet is not None:
+                base_workgroup = getattr(wallet, "opencl_device_worksize", None)
+            defaults = set()
+            if base_workgroup:
+                defaults.add(base_workgroup)
+                defaults.add(max(1, base_workgroup // 2))
+                defaults.add(base_workgroup * 2)
+            else:
+                defaults.update({4096, 8192, 16384})
+            workgroup_candidates = sorted(value for value in defaults if value and value > 0)
+
+        final_workgroups = []
+        seen_workgroups = set()
+        final_workgroups.append(None)
+        seen_workgroups.add(None)
+        for value in workgroup_candidates:
+            if value not in seen_workgroups and value > 0:
+                final_workgroups.append(value)
+                seen_workgroups.add(value)
+
+        return (
+            runtime_limit,
+            {
+                "mode": "opencl",
+                "threads": threads_candidates,
+                "workgroups": final_workgroups,
+            },
+        )
+
+    return runtime_limit, {"mode": None}
+
+
+def _run_performance_scan():
+    runtime_limit, scan_config = _derive_performance_scan_sets()
+
+    mode = scan_config.get("mode") if scan_config else None
+
+    if mode == "conflict":
+        print(
+            "Error: --performance-scan currently supports benchmarking a single accelerator configuration at a time.",
+            file=sys.stderr,
+        )
+        return 1
+
+    if mode == "gpu":
+        if len(btcrpass.args.global_ws) > 1 or len(btcrpass.args.local_ws) > 1:
+            print(
+                "Error: --performance-scan currently supports benchmarking a single GPU configuration at a time.",
+                file=sys.stderr,
+            )
+            return 1
+    elif mode == "opencl":
+        opencl_sizes = btcrpass.args.opencl_workgroup_size or []
+        if len(opencl_sizes) > 1:
+            print(
+                "Error: --performance-scan currently supports benchmarking a single OpenCL configuration at a time.",
+                file=sys.stderr,
+            )
+            return 1
+    else:
+        print(
+            "Error: --performance-scan requires either --enable-gpu or --enable-opencl.",
+            file=sys.stderr,
+        )
+        return 1
+
+    combos = []
+    seen = set()
+    if mode == "gpu":
+        for threads, global_ws, local_ws in itertools.product(
+            scan_config["threads"],
+            scan_config["global_ws"],
+            scan_config["local_ws"],
+        ):
+            if local_ws and (global_ws % local_ws != 0 or local_ws > global_ws):
+                continue
+            key = (threads, global_ws, local_ws)
+            if key in seen:
+                continue
+            combos.append({
+                "threads": threads,
+                "global_ws": global_ws,
+                "local_ws": local_ws,
+            })
+            seen.add(key)
+    else:  # mode == "opencl"
+        for threads, workgroup in itertools.product(
+            scan_config["threads"],
+            scan_config["workgroups"],
+        ):
+            key = (threads, workgroup)
+            if key in seen:
+                continue
+            combos.append({"threads": threads, "workgroup": workgroup})
+            seen.add(key)
+
+    if not combos:
+        print("No valid performance scan combinations to test.", file=sys.stderr)
+        return 1
+
+    script_path = os.path.abspath(__file__)
+    base_args = _strip_scan_related_args(sys.argv[1:])
+    runtime_arg = ["--performance-runtime", f"{runtime_limit}"]
+    total = len(combos)
+    results = []
+    failures = []
+    summary_pattern = re.compile(
+        r"Performance summary:\s*([0-9][0-9,\.]*?)\s*kP/s over ([0-9]*\.?[0-9]+) seconds \(([0-9][0-9,]*) passwords tried\)(.*)"
+    )
+
+    print(f"Running performance scan across {total} configuration(s)...")
+
+    for index, combo in enumerate(combos, start=1):
+        if mode == "gpu":
+            descriptor = (
+                f"threads={combo['threads']}, global-ws={combo['global_ws']}, "
+                f"local-ws={_format_local_ws(combo['local_ws'])}"
+            )
+        else:
+            descriptor = (
+                f"threads={combo['threads']}, "
+                f"opencl-workgroup={_format_opencl_workgroup(combo['workgroup'])}"
+            )
+        print(f"[{index}/{total}] {descriptor}")
+        cmd = [sys.executable, script_path] + base_args + runtime_arg
+        cmd.extend(["--threads", str(combo["threads"])])
+        if mode == "gpu":
+            cmd.extend(["--global-ws", str(combo["global_ws"])])
+            if combo["local_ws"] is not None:
+                cmd.extend(["--local-ws", str(combo["local_ws"])])
+        else:
+            if combo["workgroup"] is not None:
+                cmd.extend(["--opencl-workgroup-size", str(combo["workgroup"])])
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        stdout = result.stdout.strip()
+        stderr = result.stderr.strip()
+        summary_line = None
+        for line in stdout.splitlines():
+            if line.startswith("Performance summary:"):
+                summary_line = line.strip()
+        if summary_line:
+            match = summary_pattern.search(summary_line)
+        else:
+            match = None
+
+        if match:
+            rate = float(match.group(1).replace(",", ""))
+            elapsed = float(match.group(2))
+            passwords = int(match.group(3).replace(",", ""))
+            note = match.group(4).strip()
+            entry = {
+                "threads": combo["threads"],
+                "rate": rate,
+                "elapsed": elapsed,
+                "passwords": passwords,
+                "note": note,
+                "summary": summary_line,
+                "exit_code": result.returncode,
+            }
+            if mode == "gpu":
+                entry.update(
+                    {
+                        "global_ws": combo["global_ws"],
+                        "local_ws": combo["local_ws"],
+                    }
+                )
+            else:
+                entry["workgroup"] = combo["workgroup"]
+            results.append(entry)
+            print(f"    {summary_line}")
+        else:
+            failure_info = {
+                "threads": threads,
+                "global_ws": global_ws,
+                "local_ws": local_ws,
+                "exit_code": result.returncode,
+                "stdout": stdout,
+                "stderr": stderr,
+            }
+            failures.append(failure_info)
+            print("    Benchmark failed to produce a performance summary.")
+            if stderr:
+                print(f"    stderr: {stderr}")
+
+    if results:
+        sorted_results = sorted(results, key=lambda item: item["rate"], reverse=True)
+        best_result = sorted_results[0]
+        if mode == "gpu":
+            best_label = (
+                f"threads={best_result['threads']}, global-ws={best_result['global_ws']}, "
+                f"local-ws={_format_local_ws(best_result['local_ws'])}"
+            )
+        else:
+            best_label = (
+                f"threads={best_result['threads']}, opencl-workgroup={_format_opencl_workgroup(best_result['workgroup'])}"
+            )
+        print("\nBest configuration by throughput:")
+        print(
+            f"  {best_label}: {best_result['rate']:,.2f} kP/s over {best_result['elapsed']:.2f} seconds"
+            f" ({best_result['passwords']:,} passwords tried)"
+        )
+        if best_result["note"]:
+            print(f"    {best_result['note']}")
+
+        recommended_cmd = [sys.executable, script_path] + base_args
+        if runtime_limit:
+            recommended_cmd.extend(["--performance-runtime", f"{runtime_limit}"])
+        recommended_cmd.extend(["--threads", str(best_result["threads"])])
+        if mode == "gpu":
+            recommended_cmd.extend(["--global-ws", str(best_result["global_ws"])])
+            if best_result["local_ws"] is not None:
+                recommended_cmd.extend(["--local-ws", str(best_result["local_ws"])])
+        else:
+            if best_result["workgroup"] is not None:
+                recommended_cmd.extend(["--opencl-workgroup-size", str(best_result["workgroup"])])
+        print("  Recommended command:")
+        print(f"    {' '.join(shlex.quote(arg) for arg in recommended_cmd)}")
+
+        print("\nFull performance scan summary (sorted by throughput):")
+        for entry in sorted_results:
+            note_suffix = f" {entry['note']}" if entry["note"] else ""
+            if mode == "gpu":
+                label = (
+                    f"threads={entry['threads']}, global-ws={entry['global_ws']}, "
+                    f"local-ws={_format_local_ws(entry['local_ws'])}"
+                )
+            else:
+                label = (
+                    f"threads={entry['threads']}, "
+                    f"opencl-workgroup={_format_opencl_workgroup(entry['workgroup'])}"
+                )
+            print(
+                f"  {label}: {entry['rate']:,.2f} kP/s over {entry['elapsed']:.2f} seconds"
+                f" ({entry['passwords']:,} passwords tried){note_suffix}"
+            )
+
+    if failures:
+        print("\nThe following configurations did not complete successfully:")
+        for entry in failures:
+            if mode == "gpu":
+                label = (
+                    f"threads={entry['threads']}, global-ws={entry['global_ws']}, "
+                    f"local-ws={_format_local_ws(entry['local_ws'])}"
+                )
+            else:
+                label = (
+                    f"threads={entry['threads']}, "
+                    f"opencl-workgroup={_format_opencl_workgroup(entry['workgroup'])}"
+                )
+            print(f"  {label}: exit code {entry['exit_code']}")
+
+    return 0 if results else 1
 
 if __name__ == "__main__":
     print()
@@ -39,9 +421,20 @@
     )  # --listpass
 
     btcrpass.parse_arguments(sys.argv[1:])
+
+    if btcrpass.args.performance and getattr(btcrpass.args, "performance_scan", False):
+        retval = _run_performance_scan()
+        for process in multiprocessing.active_children():
+            process.join(1.0)
+        success_alert.stop_success_beep()
+        sys.exit(retval)
+
     (password_found, not_found_msg) = btcrpass.main()
 
-    if isinstance(password_found, str):
+    if btcrpass.args.performance and btcrpass.performance_run_completed:
+        retval = 0
+
+    elif isinstance(password_found, str):
         success_alert.start_success_beep()
         print()
         print(
diff --git a/btcrecover/btcrpass.py b/btcrecover/btcrpass.py
index 37f2ef6c..84dc1ff7 100644
--- a/btcrecover/btcrpass.py
+++ b/btcrecover/btcrpass.py
@@ -147,6 +147,9 @@ def error(s: str) -> None:
 
 searchfailedtext = "\nAll possible passwords (as specified in your tokenlist or passwordlist) have been checked and none are correct for this wallet. You could consider trying again with a different password list or expanded tokenlist..."
 
+performance_run_completed = False
+performance_summary_message = None
+
 def load_customTokenWildcard(customTokenWildcardFile):
     customTokenWildcards = ['']
     if customTokenWildcardFile:
@@ -6011,6 +6014,48 @@ def init_parser_common():
         parser_common.add_argument("--exclude-passwordlist", metavar="FILE", nargs="?", const="-", help="never try passwords read (exactly one per line) from this file or from stdin")
         parser_common.add_argument("--listpass",    action="store_true", help="just list all password combinations to test and exit")
         parser_common.add_argument("--performance", action="store_true", help="run a continuous performance test (Ctrl-C to exit)")
+        parser_common.add_argument(
+            "--performance-runtime",
+            type=float,
+            default=0.0,
+            metavar="SECONDS",
+            help="stop a performance test automatically after SECONDS and report the average speed",
+        )
+        parser_common.add_argument(
+            "--performance-scan",
+            action="store_true",
+            help="automatically benchmark multiple thread counts and GPU workgroup sizes when used with --performance",
+        )
+        parser_common.add_argument(
+            "--performance-scan-threads",
+            type=int,
+            nargs="+",
+            metavar="COUNT",
+            help="explicit thread counts to benchmark during --performance-scan (default: derived from detected CPU cores)",
+        )
+        parser_common.add_argument(
+            "--performance-scan-global-ws",
+            type=int,
+            nargs="+",
+            metavar="PASSWORD-COUNT",
+            help="global work sizes to benchmark during --performance-scan (default: around the configured --global-ws)",
+        )
+        parser_common.add_argument(
+            "--performance-scan-local-ws",
+            nargs="+",
+            metavar="PASSWORD-COUNT",
+            help="local work sizes to benchmark during --performance-scan; include 'auto' to test the automatic selection",
+        )
+        parser_common.add_argument(
+            "--performance-scan-opencl-ws",
+            type=int,
+            nargs="+",
+            metavar="PASSWORD-COUNT",
+            help=(
+                "OpenCL workgroup sizes to benchmark during --performance-scan "
+                "(default: around the configured --opencl-workgroup-size)"
+            ),
+        )
         parser_common.add_argument("--pause",       action="store_true", help="pause before exiting")
         parser_common.add_argument(
             "--beep-on-find",
@@ -6224,6 +6269,39 @@ def _apply_beep_configuration(parsed_args):
         pass
     #
     args = parser.parse_args(effective_argv)
+
+    if args.performance_runtime < 0:
+        error_exit("--performance-runtime must be greater than or equal to zero")
+    if args.performance_runtime and not args.performance:
+        error_exit("--performance-runtime can only be used together with --performance")
+    if args.performance_scan and not args.performance:
+        error_exit("--performance-scan can only be used together with --performance")
+    if args.performance_scan_threads:
+        for value in args.performance_scan_threads:
+            if value <= 0:
+                error_exit("--performance-scan-threads values must be positive integers")
+    if args.performance_scan_global_ws:
+        for value in args.performance_scan_global_ws:
+            if value <= 0:
+                error_exit("--performance-scan-global-ws values must be positive integers")
+    if args.performance_scan_local_ws:
+        normalized_local_ws = []
+        for value in args.performance_scan_local_ws:
+            if isinstance(value, str) and value.lower() in {"auto", "none", "null", "default"}:
+                normalized_local_ws.append(None)
+                continue
+            try:
+                int_value = int(value)
+            except (TypeError, ValueError):
+                error_exit("--performance-scan-local-ws values must be integers or 'auto'")
+            if int_value <= 0:
+                error_exit("--performance-scan-local-ws values must be positive integers")
+            normalized_local_ws.append(int_value)
+        args.performance_scan_local_ws = normalized_local_ws
+    if args.performance_scan_opencl_ws:
+        for value in args.performance_scan_opencl_ws:
+            if value <= 0:
+                error_exit("--performance-scan-opencl-ws values must be positive integers")
     _apply_beep_configuration(args)
 
     # Do this as early as possible so user doesn't miss any error messages
@@ -9169,6 +9247,10 @@ def write_checked_seeds(worker_out_queue,loaded_wallet):
 #   returns (None, None) for abnormal but not fatal errors (e.g. Ctrl-C)
 def main():
 
+    global performance_run_completed, performance_summary_message
+    performance_run_completed = False
+    performance_summary_message = None
+
     # Once installed, performs cleanup prior to a requested process shutdown on Windows
     # (this is defined inside main so it can access the passwords_tried local)
     def windows_ctrl_handler(signal):
@@ -9441,6 +9523,9 @@ def windows_ctrl_handler(signal):
     # Iterate through password_found_iterator looking for a successful guess
     password_found  = False
     passwords_tried = 0
+    performance_start_time = time.perf_counter() if args.performance else None
+    performance_limit_reached = False
+    progress_finished = False
     if progress: progress.start()
     try:
         for password_found, passwords_tried_last in password_found_iterator:
@@ -9466,6 +9551,10 @@ def windows_ctrl_handler(signal):
                     if passwords_counting_result.ready() and not passwords_counting_result.successful():
                         passwords_counting_result.get()
                 progress.update(passwords_tried)
+            if args.performance and args.performance_runtime:
+                if time.perf_counter() - performance_start_time >= args.performance_runtime:
+                    performance_limit_reached = True
+                    break
             if l_savestate and passwords_tried % est_passwords_per_5min == 0:
                 do_autosave(args.skip + passwords_tried)
         else:  # if the for loop exits normally (without breaking)
@@ -9476,8 +9565,18 @@ def windows_ctrl_handler(signal):
                 else:
                     progress.widgets.pop()  # remove the ETA
                 progress.finish()
+                progress_finished = True
             if pool: pool.join()  # if not found, waiting for processes to exit gracefully isn't a problem
 
+        if performance_limit_reached:
+            if pool:
+                pool.terminate()
+                pool.join()
+            if progress and not progress_finished:
+                progress.maxval = passwords_tried
+                progress.finish()
+                progress_finished = True
+
     # Gracefully handle any exceptions, printing the count completed so far so that it can be
     # skipped if the user restarts the same run. If the exception was expected (Ctrl-C or some
     # other intentional shutdown, or an out-of-memory condition that can be handled), fall
@@ -9505,5 +9604,21 @@ def windows_ctrl_handler(signal):
 
     worker_out_queue.close()
 
+    if args.performance:
+        performance_end_time = time.perf_counter()
+        elapsed = performance_end_time - (performance_start_time or performance_end_time)
+        elapsed = elapsed if elapsed > 0 else 0.0
+        rate = (passwords_tried / elapsed / 1000.0) if elapsed > 0 else 0.0
+        performance_summary_message = (
+            f"Performance summary: {rate:,.2f} kP/s over {elapsed:.2f} seconds ({passwords_tried:,} passwords tried)"
+        )
+        if args.performance_runtime and performance_limit_reached:
+            performance_summary_message += " (time limit reached)"
+        print(performance_summary_message)
+        performance_run_completed = True
+
     global searchfailedtext
-    return (password_found, searchfailedtext if password_found is False else None)
+    not_found_message = None
+    if password_found is False and not args.performance:
+        not_found_message = searchfailedtext
+    return (password_found, not_found_message)
diff --git a/btcrecover/btcrseed.py b/btcrecover/btcrseed.py
index 7c171afa..537a5bc7 100644
--- a/btcrecover/btcrseed.py
+++ b/btcrecover/btcrseed.py
@@ -25,7 +25,7 @@
 
 # Import modules included in standard libraries
 import sys, os, io, base64, hashlib, hmac, difflib, itertools, \
-       unicodedata, collections, struct, glob, atexit, re, random, multiprocessing, binascii, copy, datetime
+       unicodedata, collections, struct, glob, atexit, re, random, multiprocessing, binascii, copy, datetime, subprocess, shlex
 import bisect
 from typing import AnyStr, List, Optional, Sequence, Tuple, TypeVar, Union
 
@@ -83,6 +83,308 @@ def ripemd160(msg):
 except:
     pass
 
+_SCAN_OPTION_MODES = {
+    "--performance-scan": "flag",
+    "--performance-scan-threads": "greedy",
+    "--performance-scan-opencl-ws": "greedy",
+    "--threads": "single",
+    "--opencl-workgroup-size": "greedy",
+    "--performance-runtime": "single",
+}
+
+
+def _strip_scan_related_args(argv):
+    filtered = []
+    i = 0
+    length = len(argv)
+    while i < length:
+        token = argv[i]
+        mode = _SCAN_OPTION_MODES.get(token)
+        if not mode:
+            filtered.append(token)
+            i += 1
+            continue
+        i += 1
+        if mode == "flag":
+            continue
+        if mode == "single":
+            if i < length:
+                i += 1
+            continue
+        if mode == "greedy":
+            while i < length and not argv[i].startswith("--"):
+                i += 1
+            continue
+    return filtered
+
+
+def _format_opencl_workgroup(value):
+    return "auto" if value is None else str(value)
+
+
+def _detect_default_opencl_workgroup(args):
+    if not module_opencl_available:
+        return None
+    try:
+        platforms = pyopencl.get_platforms()
+    except Exception:
+        return None
+
+    if not platforms:
+        return None
+
+    def _device_score(device):
+        score = 0
+        if device.type & pyopencl.device_type.ACCELERATOR:
+            if "oclgrind" not in device.name.lower():
+                score += 8
+        elif device.type & pyopencl.device_type.GPU:
+            score += 4
+        vendor = device.vendor.lower()
+        if "nvidia" in vendor:
+            score += 2
+        elif "amd" in vendor:
+            score += 1
+        return score
+
+    platform_index = None
+    if args.opencl_platform:
+        try:
+            requested = args.opencl_platform[0]
+            if 0 <= requested < len(platforms):
+                platform_index = requested
+        except Exception:
+            platform_index = None
+
+    if platform_index is None:
+        best_index = 0
+        best_score = -1
+        best_workgroup = 0
+        for idx, platform in enumerate(platforms):
+            for device in platform.get_devices():
+                score = _device_score(device)
+                if score > best_score or (
+                    score == best_score and device.max_work_group_size > best_workgroup
+                ):
+                    best_score = score
+                    best_workgroup = device.max_work_group_size
+                    best_index = idx
+        platform_index = best_index
+
+    devices = platforms[platform_index].get_devices()
+    if not devices:
+        return None
+
+    selected_indices = []
+    if args.opencl_devices:
+        for token in args.opencl_devices:
+            try:
+                selected_indices.append(int(token))
+            except (TypeError, ValueError):
+                continue
+
+    if selected_indices:
+        max_ws = 0
+        for idx in selected_indices:
+            if 0 <= idx < len(devices):
+                max_ws = max(max_ws, devices[idx].max_work_group_size)
+        if max_ws:
+            return max_ws
+
+    return max(device.max_work_group_size for device in devices) if devices else None
+
+
+def _derive_opencl_scan_sets(args):
+    runtime_limit = args.performance_runtime or 10.0
+    if args.performance_runtime <= 0:
+        print(
+            "No --performance-runtime specified; defaulting to 10 seconds per benchmark.",
+            file=sys.stderr,
+        )
+
+    threads_candidates = (
+        sorted(
+            {
+                max(1, (args.threads or multiprocessing.cpu_count()) // 2),
+                max(1, args.threads or multiprocessing.cpu_count()),
+                min(64, (args.threads or multiprocessing.cpu_count()) * 2),
+            }
+        )
+        if not args.performance_scan_threads
+        else sorted(set(args.performance_scan_threads))
+    )
+
+    if args.performance_scan_opencl_ws:
+        workgroup_candidates = sorted(
+            {value for value in args.performance_scan_opencl_ws if value > 0}
+        )
+    else:
+        base_workgroup = None
+        if args.opencl_workgroup_size:
+            base_workgroup = args.opencl_workgroup_size[0]
+        if not base_workgroup:
+            base_workgroup = _detect_default_opencl_workgroup(args)
+        defaults = set()
+        if base_workgroup:
+            defaults.add(base_workgroup)
+            defaults.add(max(1, base_workgroup // 2))
+            defaults.add(base_workgroup * 2)
+        else:
+            defaults.update({4096, 8192, 16384})
+        workgroup_candidates = sorted(value for value in defaults if value and value > 0)
+
+    final_workgroups = []
+    seen = set()
+    final_workgroups.append(None)
+    seen.add(None)
+    for value in workgroup_candidates:
+        if value not in seen and value > 0:
+            final_workgroups.append(value)
+            seen.add(value)
+
+    return runtime_limit, threads_candidates, final_workgroups
+
+
+def _run_seed_performance_scan(args, argv):
+    if not args.enable_opencl:
+        print("Error: --performance-scan requires --enable-opencl.", file=sys.stderr)
+        return 1
+
+    if args.opencl_workgroup_size and len(args.opencl_workgroup_size) > 1:
+        print(
+            "Error: --performance-scan currently supports benchmarking a single OpenCL configuration at a time.",
+            file=sys.stderr,
+        )
+        return 1
+
+    runtime_limit, threads_candidates, workgroup_candidates = _derive_opencl_scan_sets(args)
+
+    combos = []
+    seen = set()
+    for threads, workgroup in itertools.product(threads_candidates, workgroup_candidates):
+        key = (threads, workgroup)
+        if key in seen:
+            continue
+        combos.append({"threads": threads, "workgroup": workgroup})
+        seen.add(key)
+
+    if not combos:
+        print("No valid performance scan combinations to test.", file=sys.stderr)
+        return 1
+
+    script_path = os.path.abspath(sys.argv[0])
+    base_args = _strip_scan_related_args(argv)
+    runtime_arg = ["--performance-runtime", f"{runtime_limit}"]
+    total = len(combos)
+    results = []
+    failures = []
+    summary_pattern = re.compile(
+        r"Performance summary:\s*([0-9][0-9,\.]*?)\s*kP/s over ([0-9]*\.?[0-9]+) seconds \(([0-9][0-9,]*) passwords tried\)(.*)"
+    )
+
+    print(f"Running performance scan across {total} configuration(s)...")
+
+    for index, combo in enumerate(combos, start=1):
+        descriptor = (
+            f"threads={combo['threads']}, "
+            f"opencl-workgroup={_format_opencl_workgroup(combo['workgroup'])}"
+        )
+        print(f"[{index}/{total}] {descriptor}")
+        cmd = [sys.executable, script_path] + base_args + runtime_arg
+        cmd.extend(["--threads", str(combo["threads"])])
+        if combo["workgroup"] is not None:
+            cmd.extend(["--opencl-workgroup-size", str(combo["workgroup"])])
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        stdout = result.stdout.strip()
+        stderr = result.stderr.strip()
+        summary_line = None
+        for line in stdout.splitlines():
+            if line.startswith("Performance summary:"):
+                summary_line = line.strip()
+        if summary_line:
+            match = summary_pattern.search(summary_line)
+        else:
+            match = None
+
+        if match:
+            rate = float(match.group(1).replace(",", ""))
+            elapsed = float(match.group(2))
+            passwords = int(match.group(3).replace(",", ""))
+            note = match.group(4).strip()
+            results.append(
+                {
+                    "threads": combo["threads"],
+                    "workgroup": combo["workgroup"],
+                    "rate": rate,
+                    "elapsed": elapsed,
+                    "passwords": passwords,
+                    "note": note,
+                    "summary": summary_line,
+                    "exit_code": result.returncode,
+                }
+            )
+            print(f"    {summary_line}")
+        else:
+            failures.append(
+                {
+                    "threads": combo["threads"],
+                    "workgroup": combo["workgroup"],
+                    "exit_code": result.returncode,
+                    "stdout": stdout,
+                    "stderr": stderr,
+                }
+            )
+            print("    Benchmark failed to produce a performance summary.")
+            if stderr:
+                print(f"    stderr: {stderr}")
+
+    if results:
+        sorted_results = sorted(results, key=lambda item: item["rate"], reverse=True)
+        best_result = sorted_results[0]
+        best_label = (
+            f"threads={best_result['threads']}, "
+            f"opencl-workgroup={_format_opencl_workgroup(best_result['workgroup'])}"
+        )
+        print("\nBest configuration by throughput:")
+        print(
+            f"  {best_label}: {best_result['rate']:,.2f} kP/s over {best_result['elapsed']:.2f} seconds"
+            f" ({best_result['passwords']:,} passwords tried)"
+        )
+        if best_result["note"]:
+            print(f"    {best_result['note']}")
+
+        recommended_cmd = [sys.executable, script_path] + base_args
+        if runtime_limit:
+            recommended_cmd.extend(["--performance-runtime", f"{runtime_limit}"])
+        recommended_cmd.extend(["--threads", str(best_result["threads"])])
+        if best_result["workgroup"] is not None:
+            recommended_cmd.extend(["--opencl-workgroup-size", str(best_result["workgroup"])])
+        print("  Recommended command:")
+        print(f"    {' '.join(shlex.quote(arg) for arg in recommended_cmd)}")
+
+        print("\nFull performance scan summary (sorted by throughput):")
+        for entry in sorted_results:
+            note_suffix = f" {entry['note']}" if entry["note"] else ""
+            label = (
+                f"threads={entry['threads']}, "
+                f"opencl-workgroup={_format_opencl_workgroup(entry['workgroup'])}"
+            )
+            print(
+                f"  {label}: {entry['rate']:,.2f} kP/s over {entry['elapsed']:.2f} seconds"
+                f" ({entry['passwords']:,} passwords tried){note_suffix}"
+            )
+
+    if failures:
+        print("\nThe following configurations did not complete successfully:")
+        for entry in failures:
+            label = (
+                f"threads={entry['threads']}, "
+                f"opencl-workgroup={_format_opencl_workgroup(entry['workgroup'])}"
+            )
+            print(f"  {label}: exit code {entry['exit_code']}")
+
+    return 0 if results else 1
+
 py_crypto_hd_wallet_available = False
 try:
     import py_crypto_hd_wallet
@@ -4415,6 +4717,32 @@ def main(argv):
             help="force the alert to use the internal PC speaker when a seed is found",
         )
         parser.add_argument("--performance", action="store_true",   help="run a continuous performance test (Ctrl-C to exit)")
+        parser.add_argument(
+            "--performance-runtime",
+            type=float,
+            default=0.0,
+            metavar="SECONDS",
+            help="stop a performance test automatically after SECONDS and report the average speed",
+        )
+        parser.add_argument(
+            "--performance-scan",
+            action="store_true",
+            help="automatically benchmark multiple thread counts and OpenCL workgroup sizes when used with --performance",
+        )
+        parser.add_argument(
+            "--performance-scan-threads",
+            type=int,
+            nargs="+",
+            metavar="COUNT",
+            help="explicit thread counts to benchmark during --performance-scan (default: derived from detected CPU cores)",
+        )
+        parser.add_argument(
+            "--performance-scan-opencl-ws",
+            type=int,
+            nargs="+",
+            metavar="PASSWORD-COUNT",
+            help="OpenCL workgroup sizes to benchmark during --performance-scan (default: around the configured --opencl-workgroup-size)",
+        )
         parser.add_argument("--btcr-args",   action="store_true",   help=argparse.SUPPRESS)
         parser.add_argument("--version","-v",action="store_true",   help="show full version information and exit")
         parser.add_argument("--disablesecuritywarnings", "--dsw", action="store_true", help="Disable Security Warning Messages")
@@ -4457,6 +4785,21 @@ def main(argv):
             parser.parse_args(argv)  # re-parse them just to generate an error for the unknown args
             assert False
 
+        if args.performance_runtime < 0:
+            sys.exit("--performance-runtime must be greater than or equal to zero")
+        if args.performance_runtime and not args.performance:
+            sys.exit("--performance-runtime can only be used together with --performance")
+        if args.performance_scan and not args.performance:
+            sys.exit("--performance-scan can only be used together with --performance")
+        if args.performance_scan_threads:
+            for value in args.performance_scan_threads:
+                if value <= 0:
+                    sys.exit("--performance-scan-threads values must be positive integers")
+        if args.performance_scan_opencl_ws:
+            for value in args.performance_scan_opencl_ws:
+                if value <= 0:
+                    sys.exit("--performance-scan-opencl-ws values must be positive integers")
+
         # Assign the no-gui to a global variable...
         global no_gui
         if args.no_gui:
@@ -4480,6 +4823,10 @@ def main(argv):
         if args.beep_on_find_pcspeaker:
             extra_args.append("--beep-on-find-pcspeaker")
 
+        if args.performance and args.performance_scan:
+            exit_code = _run_seed_performance_scan(args, argv)
+            sys.exit(exit_code)
+
         # Version information is always printed by seedrecover.py, so just exit
         if args.version: sys.exit(0)
 
@@ -4701,6 +5048,9 @@ def main(argv):
             if args.__dict__[argkey] is not None:
                 extra_args.extend(("--"+argkey.replace("_", "-"), str(args.__dict__[argkey])))
 
+        if args.performance_runtime:
+            extra_args.extend(["--performance-runtime", str(args.performance_runtime)])
+
 
         # These arguments (which have no values) are passed on to btcrpass.parse_arguments()
         for argkey in "no_eta", "no_progress":
diff --git a/docs/GPU_Acceleration.md b/docs/GPU_Acceleration.md
index 3741902c..dd175e0b 100644
--- a/docs/GPU_Acceleration.md
+++ b/docs/GPU_Acceleration.md
@@ -116,6 +116,12 @@ A good starting point for these wallets is:
 
 The `--performance` option tells *btcrecover* to simply measure the performance until Ctrl-C is pressed, and not to try testing any particular passwords. You will still need a wallet file (or an `--extract-data` option) for performance testing. After you you have a baseline from this initial test, you can try different values for `--global-ws` and `--local-ws` to see if they improve or worsen performance.
 
+For automated benchmarking you can add `--performance-runtime SECONDS` to stop the run after a fixed interval instead of relying on Ctrl-C. When a performance session exits (either manually or because the runtime limit was reached) *btcrecover* now prints a summary that includes the average kP/s rate, the elapsed time, and the number of passwords attempted, making it easier to track results from repeated tests.
+
+If you want to explore a range of GPU settings automatically, add `--performance-scan` alongside `--performance`. By default this benchmarks several thread counts, global work sizes, and local work sizes (including the automatic `--local-ws` selection). You can further customize the scan with `--performance-scan-threads`, `--performance-scan-global-ws`, and `--performance-scan-local-ws` to explicitly control which values are tested. Each combination is executed as an individual performance run and summarized at the end so you can quickly spot the most efficient configuration for your hardware. The summary now highlights the fastest combination and prints a ready-to-run command line so you can immediately re-test or adopt the optimal values for longer recovery jobs.
+
+The same automation works when you are using OpenCL acceleration. Pair `--performance-scan` with `--enable-opencl` on either *btcrecover* or *seedrecover* to sweep thread counts and OpenCL workgroup sizes and receive the same ranked summary and recommended command line for the best-performing configuration on your system.
+
 Finding the right values for `--global-ws` and `--local-ws` can make a 10x improvement, so it's usually worth the effort.
 
 Generally when testing, you should increase or decrease these two values by powers of 2, for example you should increase or decrease them by 128 or 256 at a time. It's important to note that `--global-ws` must always be evenly divisible by `--local-ws`, otherwise *btcrecover* will exit with an error message.