From d8265a2fa5fdcd03f2497a41310ab7d28840a974 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 14:49:13 -0500
Subject: [PATCH 1/8] benchmark workflow

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml  | 115 +++++++++
 .gitignore                                |   4 +
 .mise/tasks/generate_benchmark_summary.py | 283 ++++++++++++++++++++++
 benchmarks/README.md                      |  30 +++
 mise.toml                                 |  31 +++
 5 files changed, 463 insertions(+)
 create mode 100644 .github/workflows/nightly-benchmarks.yml
 create mode 100644 .mise/tasks/generate_benchmark_summary.py

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
new file mode 100644
index 000000000..acd95c605
--- /dev/null
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -0,0 +1,115 @@
+---
+name: Nightly Benchmarks
+
+on:
+  schedule:
+    # Run at 2 AM UTC every day
+    - cron: "0 2 * * *"
+  workflow_dispatch:
+    inputs:
+      jmh_args:
+        description: "Additional JMH arguments (e.g., '-f 1 -wi 1 -i 3' for quick run)"
+        required: false
+        default: ""
+
+permissions: {}
+
+concurrency:
+  group: "benchmarks"
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  benchmark:
+    # if: github.repository == 'prometheus/client_java'  # Uncomment for production
+    runs-on: ubuntu-24.04
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout main branch
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: true
+          fetch-depth: 0
+
+      - name: Setup mise
+        uses: jdx/mise-action@6d1e696aa24c1aa1bcc1adea0212707c71ab78a8 # v3.6.1
+        with:
+          version: v2026.1.4
+          sha256: 79c798e39b83f0dd80108eaa88c6ca63689695ae975fd6786e7a353ef9f87002
+
+      - name: Cache local Maven repository
+        uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-maven-
+
+      - name: Build benchmarks module
+        run: ./mvnw -pl benchmarks -am -DskipTests clean package
+
+      - name: Run JMH benchmarks
+        id: benchmarks
+        run: |
+          # 3 forks, 3 warmup, 5 measurement iterations (~60 min total)
+          DEFAULT_ARGS="-f 3 -wi 3 -i 5"
+          JMH_ARGS="${{ github.event.inputs.jmh_args }}"
+          JMH_ARGS="${JMH_ARGS:-$DEFAULT_ARGS}"
+
+          echo "Running benchmarks with args: $JMH_ARGS"
+
+          # Run benchmarks and output JSON (captures full results)
+          java -jar ./benchmarks/target/benchmarks.jar \
+            -rf json \
+            -rff benchmark-results.json \
+            $JMH_ARGS 2>&1 | tee benchmark-output.log
+
+      - name: Generate benchmark summary
+        run: |
+          python3 .mise/tasks/generate_benchmark_summary.py \
+            --input benchmark-results.json \
+            --output-dir benchmark-results \
+            --commit-sha "${{ github.sha }}"
+        env:
+          GITHUB_REPOSITORY: ${{ github.repository }}
+
+      - name: Checkout or create benchmarks branch
+        run: |
+          # Check if benchmarks branch exists
+          if git ls-remote --heads origin benchmarks | grep -q benchmarks; then
+            git fetch origin benchmarks
+            git checkout benchmarks
+            # Preserve history directory if it exists
+            if [ -d history ]; then
+              cp -r history benchmark-results/
+            fi
+          else
+            git checkout --orphan benchmarks
+            git rm -rf . 2>/dev/null || true
+          fi
+
+      - name: Commit and push results
+        run: |
+          # Configure git
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          # Move results to root
+          cp -r benchmark-results/* .
+          rm -rf benchmark-results
+
+          # Add all files
+          git add -A
+
+          # Commit with date
+          DATE=$(date -u +"%Y-%m-%d")
+          COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7)
+
+          git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \
+            -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit"
+
+          # Push to benchmarks branch
+          git push origin benchmarks --force-with-lease || git push origin benchmarks
diff --git a/.gitignore b/.gitignore
index b727017a9..83f5595ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,7 @@ dependency-reduced-pom.xml
 **/.settings/
 docs/public
 .lycheecache
+
+benchmark-results/
+benchmark-results.json
+benchmark-output.log
diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py
new file mode 100644
index 000000000..0581b352e
--- /dev/null
+++ b/.mise/tasks/generate_benchmark_summary.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+
+# [MISE] description="Generate markdown summary from JMH benchmark JSON results"
+# [MISE] alias="generate-benchmark-summary"
+
+"""
+Generate a markdown summary from JMH benchmark JSON results.
+
+Usage:
+    python3 .mise/tasks/generate_benchmark_summary.py [--input results.json] [--output-dir ./benchmark-results]
+
+This script:
+1. Reads JMH JSON output
+2. Generates a README.md with formatted tables
+3. Copies results to the output directory with historical naming
+"""
+
+import argparse
+import json
+import os
+import shutil
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate benchmark summary from JMH JSON")
+    parser.add_argument(
+        "--input",
+        default="benchmark-results.json",
+        help="Path to JMH JSON results file (default: benchmark-results.json)",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="benchmark-results",
+        help="Output directory for results (default: benchmark-results)",
+    )
+    parser.add_argument(
+        "--commit-sha",
+        default=None,
+        help="Git commit SHA (default: read from git or 'local')",
+    )
+    return parser.parse_args()
+
+
+def get_commit_sha(provided_sha: Optional[str]) -> str:
+    """Get commit SHA from argument, git, or return 'local'."""
+    if provided_sha:
+        return provided_sha
+
+    # Try to get from git
+    try:
+        import subprocess
+
+        result = subprocess.run(
+            ["git", "rev-parse", "HEAD"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip()
+    except Exception:
+        pass
+
+    return "local"
+
+
+def format_score(score) -> str:
+    """Format score with appropriate precision."""
+    try:
+        val = float(score)
+        if val >= 1_000_000:
+            return f"{val / 1_000_000:.2f}M"
+        elif val >= 1_000:
+            return f"{val / 1_000:.2f}K"
+        else:
+            return f"{val:.2f}"
+    except (ValueError, TypeError):
+        return str(score)
+
+
+def format_error(error) -> str:
+    """Format error value, handling NaN."""
+    try:
+        error_val = float(error)
+        if error_val != error_val:  # NaN check
+            return ""
+        elif error_val >= 1_000:
+            return f"± {error_val / 1_000:.2f}K"
+        else:
+            return f"± {error_val:.2f}"
+    except (ValueError, TypeError):
+        return ""
+
+
+def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
+    """Generate markdown summary from JMH results."""
+    commit_short = commit_sha[:7]
+    datetime_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    # Extract metadata from first result
+    first = results[0] if results else {}
+    jdk_version = first.get("jdkVersion", "unknown")
+    vm_name = first.get("vmName", "unknown")
+    threads = first.get("threads", "?")
+    forks = first.get("forks", "?")
+    warmup_iters = first.get("warmupIterations", "?")
+    measure_iters = first.get("measurementIterations", "?")
+
+    md = []
+    md.append("# Prometheus Java Client Benchmarks")
+    md.append("")
+
+    # Run metadata
+    md.append("## Run Information")
+    md.append("")
+    md.append(f"- **Date:** {datetime_str}")
+    if commit_sha != "local":
+        md.append(f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})")
+    else:
+        md.append(f"- **Commit:** `{commit_short}` (local run)")
+    md.append(f"- **JDK:** {jdk_version} ({vm_name})")
+    md.append(f"- **Configuration:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads")
+    md.append("")
+
+    # Group by benchmark class
+    benchmarks_by_class: Dict[str, List] = {}
+    for b in results:
+        name = b.get("benchmark", "")
+        parts = name.rsplit(".", 1)
+        if len(parts) == 2:
+            class_name, method = parts
+            class_short = class_name.split(".")[-1]
+        else:
+            class_short = "Other"
+        benchmarks_by_class.setdefault(class_short, []).append(b)
+
+    md.append("## Results")
+    md.append("")
+
+    # Generate table for each class
+    for class_name in sorted(benchmarks_by_class.keys()):
+        benchmarks = benchmarks_by_class[class_name]
+        md.append(f"### {class_name}")
+        md.append("")
+
+        # Sort by score descending
+        sorted_benchmarks = sorted(
+            benchmarks,
+            key=lambda x: x.get("primaryMetric", {}).get("score", 0),
+            reverse=True
+        )
+
+        md.append("| Benchmark | Score | Error | Units | |")
+        md.append("|:----------|------:|------:|:------|:---|")
+
+        best_score = sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) if sorted_benchmarks else 1
+
+        for i, b in enumerate(sorted_benchmarks):
+            name = b.get("benchmark", "").split(".")[-1]
+            score = b.get("primaryMetric", {}).get("score", 0)
+            error = b.get("primaryMetric", {}).get("scoreError", 0)
+            unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s")
+
+            score_fmt = format_score(score)
+            error_fmt = format_error(error)
+
+            # Calculate relative performance as multiplier
+            try:
+                if i == 0:
+                    relative_fmt = "**fastest**"
+                else:
+                    multiplier = float(best_score) / float(score)
+                    if multiplier >= 10:
+                        relative_fmt = f"{multiplier:.0f}x slower"
+                    else:
+                        relative_fmt = f"{multiplier:.1f}x slower"
+            except (ValueError, TypeError, ZeroDivisionError):
+                relative_fmt = ""
+
+            md.append(f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |")
+
+        md.append("")
+
+    md.append("### Raw Results")
+    md.append("")
+    md.append("```")
+    md.append(f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12}  Units")
+
+    for b in sorted(results, key=lambda x: x.get("benchmark", "")):
+        name = b.get("benchmark", "").replace("io.prometheus.metrics.benchmarks.", "")
+        mode = b.get("mode", "thrpt")
+        cnt = b.get("measurementIterations", 0) * b.get("forks", 1)
+        score = b.get("primaryMetric", {}).get("score", 0)
+        error = b.get("primaryMetric", {}).get("scoreError", 0)
+        unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s")
+
+        try:
+            score_str = f"{float(score):.3f}"
+        except (ValueError, TypeError):
+            score_str = str(score)
+
+        try:
+            error_val = float(error)
+            if error_val != error_val:  # NaN
+                error_str = ""
+            else:
+                error_str = f"± {error_val:.3f}"
+        except (ValueError, TypeError):
+            error_str = ""
+
+        md.append(f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12}  {unit}")
+
+    md.append("```")
+    md.append("")
+
+    md.append("## Notes")
+    md.append("")
+    md.append("- **Score** = Throughput in operations per second (higher is better)")
+    md.append("- **Error** = 99.9% confidence interval")
+    md.append("")
+
+    md.append("## Benchmark Descriptions")
+    md.append("")
+    md.append("| Benchmark | Description |")
+    md.append("|:----------|:------------|")
+    md.append("| **CounterBenchmark** | Compares counter increment performance across Prometheus, OpenTelemetry, simpleclient (0.16.0), and Codahale Metrics |")
+    md.append("| **HistogramBenchmark** | Compares histogram observation performance (classic buckets vs native/exponential) |")
+    md.append("| **TextFormatUtilBenchmark** | Measures metric exposition format writing speed (Prometheus text vs OpenMetrics) |")
+    md.append("")
+    return "\n".join(md)
+
+
+def main():
+    args = parse_args()
+
+    # Check input file exists
+    input_path = Path(args.input)
+    if not input_path.exists():
+        print(f"Error: Input file not found: {input_path}")
+        sys.exit(1)
+
+    # Load JSON results
+    print(f"Reading results from: {input_path}")
+    with open(input_path, "r") as f:
+        results = json.load(f)
+
+    print(f"Found {len(results)} benchmark results")
+
+    # Get commit info
+    commit_sha = get_commit_sha(args.commit_sha)
+    commit_short = commit_sha[:7]
+    repo = os.environ.get("GITHUB_REPOSITORY", "prometheus/client_java")
+
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    history_dir = output_dir / "history"
+    history_dir.mkdir(parents=True, exist_ok=True)
+
+    results_json_path = output_dir / "results.json"
+    shutil.copy(input_path, results_json_path)
+    print(f"Copied results to: {results_json_path}")
+
+    date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    history_path = history_dir / f"{date_str}-{commit_short}.json"
+    shutil.copy(input_path, history_path)
+    print(f"Saved historical entry: {history_path}")
+
+    markdown = generate_markdown(results, commit_sha, repo)
+    readme_path = output_dir / "README.md"
+    with open(readme_path, "w") as f:
+        f.write(markdown)
+    print(f"Generated summary: {readme_path}")
+
+    print(f"\nDone! Results are in: {output_dir}/")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3bba56422..c201d356b 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -10,6 +10,17 @@ Run benchmarks and update the results in the Javadoc of the benchmark classes:
 mise run update-benchmarks
 ```
 
+### Different benchmark configurations
+
+The full benchmark suite takes approximately 2 hours with JMH defaults.
+For faster iterations, use these preset configurations:
+
+| Command | Duration | Use Case |
+|---------|----------|----------|
+| `mise run benchmark:quick` | ~10 min | Quick smoke test during development |
+| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy |
+| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation |
+
 ### Running benchmarks manually
 
 ```shell
@@ -22,6 +33,25 @@ Run only one specific benchmark:
 java -jar ./benchmarks/target/benchmarks.jar CounterBenchmark
 ```
 
+### Custom JMH arguments
+
+You can pass custom JMH arguments:
+
+```shell
+# Quick run: 1 fork, 1 warmup iteration, 3 measurement iterations
+mise run update-benchmarks -- --jmh-args "-f 1 -wi 1 -i 3"
+
+# Standard CI: 3 forks, 3 warmup iterations, 5 measurement iterations
+mise run update-benchmarks -- --jmh-args "-f 3 -wi 3 -i 5"
+```
+
+JMH parameter reference:
+- `-f N`: Number of forks (JVM restarts)
+- `-wi N`: Number of warmup iterations
+- `-i N`: Number of measurement iterations
+- `-w Ns`: Warmup iteration time (default: 10s)
+- `-r Ns`: Measurement iteration time (default: 10s)
+
 ## Results
 
 See Javadoc of the benchmark classes:
diff --git a/mise.toml b/mise.toml
index 56d2680f5..146ca1fe9 100644
--- a/mise.toml
+++ b/mise.toml
@@ -83,3 +83,34 @@ run = [
   "hugo --gc --minify --baseURL ${BASE_URL}/",
   "echo 'ls ./public/api' && ls ./public/api"
 ]
+
+[tasks."benchmark:quick"]
+description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)"
+run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'"
+
+[tasks."benchmark:standard"]
+description = "Run benchmarks with standard CI settings (~60 min)"
+run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'"
+
+[tasks."benchmark:full"]
+description = "Run benchmarks with full JMH defaults (~2 hours)"
+run = "python3 ./.mise/tasks/update_benchmarks.py"
+
+[tasks."benchmark:test"]
+description = "Run minimal benchmark test (~1 min) and generate summary"
+run = [
+  "./mvnw -pl benchmarks -am -DskipTests package -q",
+  "java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json -f 1 -wi 1 -i 3 -r 1s",
+  "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results",
+  "echo ''",
+  "echo '=== Generated files ==='",
+  "ls -la benchmark-results/",
+  "ls -la benchmark-results/history/",
+  "echo ''",
+  "echo '=== README.md ==='",
+  "cat benchmark-results/README.md"
+]
+
+[tasks."benchmark:generate-summary"]
+description = "Generate summary from existing benchmark-results.json"
+run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results"

From 83edd829517ce758e26850552aac9cea4d3b22e9 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 15:14:10 -0500
Subject: [PATCH 2/8] fix whats inclued

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml | 35 ++++++++++++------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index acd95c605..59a6373fc 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -76,40 +76,41 @@ jobs:
         env:
           GITHUB_REPOSITORY: ${{ github.repository }}
 
-      - name: Checkout or create benchmarks branch
+      - name: Commit and push results to benchmarks branch
         run: |
-          # Check if benchmarks branch exists
+          # Save results to a temp location
+          mkdir -p /tmp/benchmark-output
+          cp -r benchmark-results/* /tmp/benchmark-output/
+
+          # Configure git
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          # Checkout or create benchmarks branch
           if git ls-remote --heads origin benchmarks | grep -q benchmarks; then
             git fetch origin benchmarks
             git checkout benchmarks
-            # Preserve history directory if it exists
+            # Preserve existing history
             if [ -d history ]; then
-              cp -r history benchmark-results/
+              cp -r history /tmp/benchmark-output/
             fi
           else
             git checkout --orphan benchmarks
-            git rm -rf . 2>/dev/null || true
           fi
 
-      - name: Commit and push results
-        run: |
-          # Configure git
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
+          # Clean working directory
+          git rm -rf . 2>/dev/null || true
+          find . -mindepth 1 -maxdepth 1 ! -name '.git' -exec rm -rf {} +
 
-          # Move results to root
-          cp -r benchmark-results/* .
-          rm -rf benchmark-results
+          # Copy only the benchmark results
+          cp -r /tmp/benchmark-output/* .
 
-          # Add all files
-          git add -A
+          git add README.md results.json history/
 
-          # Commit with date
           DATE=$(date -u +"%Y-%m-%d")
           COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7)
 
           git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \
             -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit"
 
-          # Push to benchmarks branch
           git push origin benchmarks --force-with-lease || git push origin benchmarks

From 0efa6227dad7c61fbe78c42b03c7f243b4d844e7 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 15:23:47 -0500
Subject: [PATCH 3/8] host info

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml  |  5 +-
 .mise/tasks/generate_benchmark_summary.py | 80 ++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index 59a6373fc..057192cb3 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -52,7 +52,6 @@ jobs:
         run: ./mvnw -pl benchmarks -am -DskipTests clean package
 
       - name: Run JMH benchmarks
-        id: benchmarks
         run: |
           # 3 forks, 3 warmup, 5 measurement iterations (~60 min total)
           DEFAULT_ARGS="-f 3 -wi 3 -i 5"
@@ -61,11 +60,10 @@ jobs:
 
           echo "Running benchmarks with args: $JMH_ARGS"
 
-          # Run benchmarks and output JSON (captures full results)
           java -jar ./benchmarks/target/benchmarks.jar \
             -rf json \
             -rff benchmark-results.json \
-            $JMH_ARGS 2>&1 | tee benchmark-output.log
+            $JMH_ARGS
 
       - name: Generate benchmark summary
         run: |
@@ -82,7 +80,6 @@ jobs:
           mkdir -p /tmp/benchmark-output
           cp -r benchmark-results/* /tmp/benchmark-output/
 
-          # Configure git
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py
index 0581b352e..3c23899dc 100644
--- a/.mise/tasks/generate_benchmark_summary.py
+++ b/.mise/tasks/generate_benchmark_summary.py
@@ -45,6 +45,67 @@ def parse_args():
     return parser.parse_args()
 
 
+def get_system_info() -> Dict[str, str]:
+    """Capture system hardware information."""
+    import platform
+    import multiprocessing
+
+    info = {}
+
+    # CPU cores
+    try:
+        info["cpu_cores"] = str(multiprocessing.cpu_count())
+    except Exception:
+        pass
+
+    # CPU model - try Linux first, then macOS
+    try:
+        with open("/proc/cpuinfo", "r") as f:
+            for line in f:
+                if line.startswith("model name"):
+                    info["cpu_model"] = line.split(":")[1].strip()
+                    break
+    except FileNotFoundError:
+        # macOS
+        try:
+            import subprocess
+            result = subprocess.run(
+                ["sysctl", "-n", "machdep.cpu.brand_string"],
+                capture_output=True, text=True, timeout=5
+            )
+            if result.returncode == 0:
+                info["cpu_model"] = result.stdout.strip()
+        except Exception:
+            pass
+
+    # Memory - try Linux first, then macOS
+    try:
+        with open("/proc/meminfo", "r") as f:
+            for line in f:
+                if line.startswith("MemTotal"):
+                    kb = int(line.split()[1])
+                    info["memory_gb"] = str(round(kb / 1024 / 1024))
+                    break
+    except FileNotFoundError:
+        # macOS
+        try:
+            import subprocess
+            result = subprocess.run(
+                ["sysctl", "-n", "hw.memsize"],
+                capture_output=True, text=True, timeout=5
+            )
+            if result.returncode == 0:
+                bytes_mem = int(result.stdout.strip())
+                info["memory_gb"] = str(round(bytes_mem / 1024 / 1024 / 1024))
+        except Exception:
+            pass
+
+    # OS
+    info["os"] = f"{platform.system()} {platform.release()}"
+
+    return info
+
+
 def get_commit_sha(provided_sha: Optional[str]) -> str:
     """Get commit SHA from argument, git, or return 'local'."""
     if provided_sha:
@@ -110,6 +171,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     warmup_iters = first.get("warmupIterations", "?")
     measure_iters = first.get("measurementIterations", "?")
 
+    # Get system info
+    sysinfo = get_system_info()
+
     md = []
     md.append("# Prometheus Java Client Benchmarks")
     md.append("")
@@ -123,7 +187,21 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     else:
         md.append(f"- **Commit:** `{commit_short}` (local run)")
     md.append(f"- **JDK:** {jdk_version} ({vm_name})")
-    md.append(f"- **Configuration:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads")
+    md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads")
+
+    # Hardware info
+    hw_parts = []
+    if sysinfo.get("cpu_model"):
+        hw_parts.append(sysinfo["cpu_model"])
+    if sysinfo.get("cpu_cores"):
+        hw_parts.append(f"{sysinfo['cpu_cores']} cores")
+    if sysinfo.get("memory_gb"):
+        hw_parts.append(f"{sysinfo['memory_gb']} GB RAM")
+    if hw_parts:
+        md.append(f"- **Hardware:** {', '.join(hw_parts)}")
+    if sysinfo.get("os"):
+        md.append(f"- **OS:** {sysinfo['os']}")
+
     md.append("")
 
     # Group by benchmark class

From a8dacf2152e859954f911e34d0b1afd7f276128f Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 15:34:45 -0500
Subject: [PATCH 4/8] host info

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index 057192cb3..a66a51b99 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -83,16 +83,16 @@ jobs:
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          # Checkout or create benchmarks branch
+          # Checkout or create benchmarks branch (use -- to disambiguate from benchmarks/ directory)
           if git ls-remote --heads origin benchmarks | grep -q benchmarks; then
             git fetch origin benchmarks
-            git checkout benchmarks
+            git switch benchmarks
             # Preserve existing history
             if [ -d history ]; then
               cp -r history /tmp/benchmark-output/
             fi
           else
-            git checkout --orphan benchmarks
+            git switch --orphan benchmarks
           fi
 
           # Clean working directory

From aa12656149c30e385ee3a3d174c3b84d6909f646 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 15:50:45 -0500
Subject: [PATCH 5/8] cleanup

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml  |  1 -
 .mise/tasks/generate_benchmark_summary.py | 12 ------------
 mise.toml                                 | 19 -------------------
 3 files changed, 32 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index a66a51b99..24ed26cea 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -23,7 +23,6 @@ defaults:
 
 jobs:
   benchmark:
-    # if: github.repository == 'prometheus/client_java'  # Uncomment for production
     runs-on: ubuntu-24.04
     permissions:
       contents: write
diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py
index 3c23899dc..24daadca1 100644
--- a/.mise/tasks/generate_benchmark_summary.py
+++ b/.mise/tasks/generate_benchmark_summary.py
@@ -52,13 +52,11 @@ def get_system_info() -> Dict[str, str]:
 
     info = {}
 
-    # CPU cores
     try:
         info["cpu_cores"] = str(multiprocessing.cpu_count())
     except Exception:
         pass
 
-    # CPU model - try Linux first, then macOS
     try:
         with open("/proc/cpuinfo", "r") as f:
             for line in f:
@@ -78,7 +76,6 @@ def get_system_info() -> Dict[str, str]:
         except Exception:
             pass
 
-    # Memory - try Linux first, then macOS
     try:
         with open("/proc/meminfo", "r") as f:
             for line in f:
@@ -100,7 +97,6 @@ def get_system_info() -> Dict[str, str]:
         except Exception:
             pass
 
-    # OS
     info["os"] = f"{platform.system()} {platform.release()}"
 
     return info
@@ -111,7 +107,6 @@ def get_commit_sha(provided_sha: Optional[str]) -> str:
     if provided_sha:
         return provided_sha
 
-    # Try to get from git
     try:
         import subprocess
 
@@ -171,14 +166,12 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     warmup_iters = first.get("warmupIterations", "?")
     measure_iters = first.get("measurementIterations", "?")
 
-    # Get system info
     sysinfo = get_system_info()
 
     md = []
     md.append("# Prometheus Java Client Benchmarks")
     md.append("")
 
-    # Run metadata
     md.append("## Run Information")
     md.append("")
     md.append(f"- **Date:** {datetime_str}")
@@ -189,7 +182,6 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     md.append(f"- **JDK:** {jdk_version} ({vm_name})")
     md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads")
 
-    # Hardware info
     hw_parts = []
     if sysinfo.get("cpu_model"):
         hw_parts.append(sysinfo["cpu_model"])
@@ -315,25 +307,21 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
 def main():
     args = parse_args()
 
-    # Check input file exists
     input_path = Path(args.input)
     if not input_path.exists():
         print(f"Error: Input file not found: {input_path}")
         sys.exit(1)
 
-    # Load JSON results
     print(f"Reading results from: {input_path}")
     with open(input_path, "r") as f:
         results = json.load(f)
 
     print(f"Found {len(results)} benchmark results")
 
-    # Get commit info
     commit_sha = get_commit_sha(args.commit_sha)
     commit_short = commit_sha[:7]
     repo = os.environ.get("GITHUB_REPOSITORY", "prometheus/client_java")
 
-    # Create output directory
     output_dir = Path(args.output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     history_dir = output_dir / "history"
diff --git a/mise.toml b/mise.toml
index 146ca1fe9..fbdb4e6dd 100644
--- a/mise.toml
+++ b/mise.toml
@@ -88,29 +88,10 @@ run = [
 description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)"
 run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'"
 
-[tasks."benchmark:standard"]
-description = "Run benchmarks with standard CI settings (~60 min)"
-run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'"
-
 [tasks."benchmark:full"]
 description = "Run benchmarks with full JMH defaults (~2 hours)"
 run = "python3 ./.mise/tasks/update_benchmarks.py"
 
-[tasks."benchmark:test"]
-description = "Run minimal benchmark test (~1 min) and generate summary"
-run = [
-  "./mvnw -pl benchmarks -am -DskipTests package -q",
-  "java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json -f 1 -wi 1 -i 3 -r 1s",
-  "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results",
-  "echo ''",
-  "echo '=== Generated files ==='",
-  "ls -la benchmark-results/",
-  "ls -la benchmark-results/history/",
-  "echo ''",
-  "echo '=== README.md ==='",
-  "cat benchmark-results/README.md"
-]
-
 [tasks."benchmark:generate-summary"]
 description = "Generate summary from existing benchmark-results.json"
 run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results"

From bc60f49eeb1b0f4ebd02318def328f8d6158726a Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Sun, 25 Jan 2026 15:57:20 -0500
Subject: [PATCH 6/8] linting

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml  |  5 +++--
 .mise/tasks/generate_benchmark_summary.py | 17 +++++++++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index 24ed26cea..0c6d4dff9 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -54,8 +54,7 @@ jobs:
         run: |
           # 3 forks, 3 warmup, 5 measurement iterations (~60 min total)
           DEFAULT_ARGS="-f 3 -wi 3 -i 5"
-          JMH_ARGS="${{ github.event.inputs.jmh_args }}"
-          JMH_ARGS="${JMH_ARGS:-$DEFAULT_ARGS}"
+          JMH_ARGS="${INPUT_JMH_ARGS:-$DEFAULT_ARGS}"
 
           echo "Running benchmarks with args: $JMH_ARGS"
 
@@ -63,6 +62,8 @@ jobs:
             -rf json \
             -rff benchmark-results.json \
             $JMH_ARGS
+        env:
+          INPUT_JMH_ARGS: ${{ github.event.inputs.jmh_args }}
 
       - name: Generate benchmark summary
         run: |
diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py
index 24daadca1..9736f4c70 100644
--- a/.mise/tasks/generate_benchmark_summary.py
+++ b/.mise/tasks/generate_benchmark_summary.py
@@ -180,7 +180,8 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     else:
         md.append(f"- **Commit:** `{commit_short}` (local run)")
     md.append(f"- **JDK:** {jdk_version} ({vm_name})")
-    md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads")
+    bench_cfg = f"{forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads"
+    md.append(f"- **Benchmark config:** {bench_cfg}")
 
     hw_parts = []
     if sysinfo.get("cpu_model"):
@@ -297,9 +298,17 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     md.append("")
     md.append("| Benchmark | Description |")
     md.append("|:----------|:------------|")
-    md.append("| **CounterBenchmark** | Compares counter increment performance across Prometheus, OpenTelemetry, simpleclient (0.16.0), and Codahale Metrics |")
-    md.append("| **HistogramBenchmark** | Compares histogram observation performance (classic buckets vs native/exponential) |")
-    md.append("| **TextFormatUtilBenchmark** | Measures metric exposition format writing speed (Prometheus text vs OpenMetrics) |")
+    md.append(
+        "| **CounterBenchmark** | Counter increment performance: "
+        "Prometheus, OpenTelemetry, simpleclient, Codahale |"
+    )
+    md.append(
+        "| **HistogramBenchmark** | Histogram observation performance "
+        "(classic vs native/exponential) |"
+    )
+    md.append(
+        "| **TextFormatUtilBenchmark** | Metric exposition format writing speed |"
+    )
     md.append("")
     return "\n".join(md)
 

From 4f72fa3263c2fb5044b99bb9fae141b3e8cf65e4 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Mon, 26 Jan 2026 06:58:52 -0500
Subject: [PATCH 7/8] linting

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml  |  7 ++--
 .mise/tasks/generate_benchmark_summary.py | 40 +++++++++++++++++------
 benchmarks/README.md                      | 11 ++++---
 3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index 0c6d4dff9..7e798c6c5 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -58,6 +58,7 @@ jobs:
 
           echo "Running benchmarks with args: $JMH_ARGS"
 
+          # shellcheck disable=SC2086 # Intentional word splitting for JMH args
           java -jar ./benchmarks/target/benchmarks.jar \
             -rf json \
             -rff benchmark-results.json \
@@ -107,7 +108,9 @@ jobs:
           DATE=$(date -u +"%Y-%m-%d")
           COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7)
 
-          git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \
-            -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit"
+          git commit \
+            -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \
+            -m "From commit ${{ github.sha }}" \
+            || echo "No changes to commit"
 
           git push origin benchmarks --force-with-lease || git push origin benchmarks
diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py
index 9736f4c70..0b0c4fb01 100644
--- a/.mise/tasks/generate_benchmark_summary.py
+++ b/.mise/tasks/generate_benchmark_summary.py
@@ -26,7 +26,9 @@
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description="Generate benchmark summary from JMH JSON")
+    parser = argparse.ArgumentParser(
+        description="Generate benchmark summary from JMH JSON"
+    )
     parser.add_argument(
         "--input",
         default="benchmark-results.json",
@@ -47,8 +49,8 @@ def parse_args():
 
 def get_system_info() -> Dict[str, str]:
     """Capture system hardware information."""
-    import platform
     import multiprocessing
+    import platform
 
     info = {}
 
@@ -67,9 +69,12 @@ def get_system_info() -> Dict[str, str]:
         # macOS
         try:
             import subprocess
+
             result = subprocess.run(
                 ["sysctl", "-n", "machdep.cpu.brand_string"],
-                capture_output=True, text=True, timeout=5
+                capture_output=True,
+                text=True,
+                timeout=5,
             )
             if result.returncode == 0:
                 info["cpu_model"] = result.stdout.strip()
@@ -87,9 +92,12 @@ def get_system_info() -> Dict[str, str]:
         # macOS
         try:
             import subprocess
+
             result = subprocess.run(
                 ["sysctl", "-n", "hw.memsize"],
-                capture_output=True, text=True, timeout=5
+                capture_output=True,
+                text=True,
+                timeout=5,
             )
             if result.returncode == 0:
                 bytes_mem = int(result.stdout.strip())
@@ -176,7 +184,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
     md.append("")
     md.append(f"- **Date:** {datetime_str}")
     if commit_sha != "local":
-        md.append(f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})")
+        md.append(
+            f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})"
+        )
     else:
         md.append(f"- **Commit:** `{commit_short}` (local run)")
     md.append(f"- **JDK:** {jdk_version} ({vm_name})")
@@ -222,13 +232,17 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
         sorted_benchmarks = sorted(
             benchmarks,
             key=lambda x: x.get("primaryMetric", {}).get("score", 0),
-            reverse=True
+            reverse=True,
         )
 
         md.append("| Benchmark | Score | Error | Units | |")
         md.append("|:----------|------:|------:|:------|:---|")
 
-        best_score = sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) if sorted_benchmarks else 1
+        best_score = (
+            sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1)
+            if sorted_benchmarks
+            else 1
+        )
 
         for i, b in enumerate(sorted_benchmarks):
             name = b.get("benchmark", "").split(".")[-1]
@@ -252,14 +266,18 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
             except (ValueError, TypeError, ZeroDivisionError):
                 relative_fmt = ""
 
-            md.append(f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |")
+            md.append(
+                f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |"
+            )
 
         md.append("")
 
     md.append("### Raw Results")
     md.append("")
     md.append("```")
-    md.append(f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12}  Units")
+    md.append(
+        f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12}  Units"
+    )
 
     for b in sorted(results, key=lambda x: x.get("benchmark", "")):
         name = b.get("benchmark", "").replace("io.prometheus.metrics.benchmarks.", "")
@@ -283,7 +301,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str:
         except (ValueError, TypeError):
             error_str = ""
 
-        md.append(f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12}  {unit}")
+        md.append(
+            f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12}  {unit}"
+        )
 
     md.append("```")
     md.append("")
diff --git a/benchmarks/README.md b/benchmarks/README.md
index c201d356b..b4c824d85 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -15,11 +15,11 @@ mise run update-benchmarks
 The full benchmark suite takes approximately 2 hours with JMH defaults.
 For faster iterations, use these preset configurations:
 
-| Command | Duration | Use Case |
-|---------|----------|----------|
-| `mise run benchmark:quick` | ~10 min | Quick smoke test during development |
-| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy |
-| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation |
+| Command                       | Duration | Use Case                                 |
+| ----------------------------- | -------- | ---------------------------------------- |
+| `mise run benchmark:quick`    | ~10 min  | Quick smoke test during development      |
+| `mise run benchmark:standard` | ~60 min  | CI/nightly runs with good accuracy       |
+| `mise run benchmark:full`     | ~2 hours | Full JMH defaults for release validation |
 
 ### Running benchmarks manually
 
@@ -46,6 +46,7 @@ mise run update-benchmarks -- --jmh-args "-f 3 -wi 3 -i 5"
 ```
 
 JMH parameter reference:
+
 - `-f N`: Number of forks (JVM restarts)
 - `-wi N`: Number of warmup iterations
 - `-i N`: Number of measurement iterations

From 0779c50de28f9ca64d8b358d9003fe7474f911c0 Mon Sep 17 00:00:00 2001
From: Jay DeLuca <jaydeluca4@gmail.com>
Date: Mon, 26 Jan 2026 13:56:08 -0500
Subject: [PATCH 8/8] add mise command for running the benchmarks

Signed-off-by: Jay DeLuca <jaydeluca4@gmail.com>
---
 .github/workflows/nightly-benchmarks.yml | 20 +++-----------------
 mise.toml                                | 17 +++++++++++++----
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml
index 7e798c6c5..acc388546 100644
--- a/.github/workflows/nightly-benchmarks.yml
+++ b/.github/workflows/nightly-benchmarks.yml
@@ -47,28 +47,14 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-maven-
 
-      - name: Build benchmarks module
-        run: ./mvnw -pl benchmarks -am -DskipTests clean package
-
       - name: Run JMH benchmarks
-        run: |
-          # 3 forks, 3 warmup, 5 measurement iterations (~60 min total)
-          DEFAULT_ARGS="-f 3 -wi 3 -i 5"
-          JMH_ARGS="${INPUT_JMH_ARGS:-$DEFAULT_ARGS}"
-
-          echo "Running benchmarks with args: $JMH_ARGS"
-
-          # shellcheck disable=SC2086 # Intentional word splitting for JMH args
-          java -jar ./benchmarks/target/benchmarks.jar \
-            -rf json \
-            -rff benchmark-results.json \
-            $JMH_ARGS
+        run: mise run benchmark:ci-json
         env:
-          INPUT_JMH_ARGS: ${{ github.event.inputs.jmh_args }}
+          JMH_ARGS: ${{ github.event.inputs.jmh_args }}
 
       - name: Generate benchmark summary
         run: |
-          python3 .mise/tasks/generate_benchmark_summary.py \
+          mise run benchmark:generate-summary \
             --input benchmark-results.json \
             --output-dir benchmark-results \
             --commit-sha "${{ github.sha }}"
diff --git a/mise.toml b/mise.toml
index fbdb4e6dd..3718e5ca4 100644
--- a/mise.toml
+++ b/mise.toml
@@ -88,10 +88,19 @@ run = [
 description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)"
 run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'"
 
-[tasks."benchmark:full"]
-description = "Run benchmarks with full JMH defaults (~2 hours)"
-run = "python3 ./.mise/tasks/update_benchmarks.py"
+[tasks."benchmark:ci"]
+description = "Run benchmarks with CI configuration (3 forks, 3 warmup, 5 measurement iterations (~60 min total)"
+run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'"
+
+[tasks."benchmark:ci-json"]
+description = "Run benchmarks with CI configuration and JSON output (for workflow/testing)"
+run = """
+./mvnw -pl benchmarks -am -DskipTests clean package
+JMH_ARGS="${JMH_ARGS:--f 3 -wi 3 -i 5}"
+echo "Running benchmarks with args: $JMH_ARGS"
+java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json $JMH_ARGS
+"""
 
 [tasks."benchmark:generate-summary"]
 description = "Generate summary from existing benchmark-results.json"
-run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results"
+run = "python3 ./.mise/tasks/generate_benchmark_summary.py"