From d8265a2fa5fdcd03f2497a41310ab7d28840a974 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 14:49:13 -0500 Subject: [PATCH 1/8] benchmark workflow Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 115 +++++++++ .gitignore | 4 + .mise/tasks/generate_benchmark_summary.py | 283 ++++++++++++++++++++++ benchmarks/README.md | 30 +++ mise.toml | 31 +++ 5 files changed, 463 insertions(+) create mode 100644 .github/workflows/nightly-benchmarks.yml create mode 100644 .mise/tasks/generate_benchmark_summary.py diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml new file mode 100644 index 000000000..acd95c605 --- /dev/null +++ b/.github/workflows/nightly-benchmarks.yml @@ -0,0 +1,115 @@ +--- +name: Nightly Benchmarks + +on: + schedule: + # Run at 2 AM UTC every day + - cron: "0 2 * * *" + workflow_dispatch: + inputs: + jmh_args: + description: "Additional JMH arguments (e.g., '-f 1 -wi 1 -i 3' for quick run)" + required: false + default: "" + +permissions: {} + +concurrency: + group: "benchmarks" + +defaults: + run: + shell: bash + +jobs: + benchmark: + # if: github.repository == 'prometheus/client_java' # Uncomment for production + runs-on: ubuntu-24.04 + permissions: + contents: write + steps: + - name: Checkout main branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: true + fetch-depth: 0 + + - name: Setup mise + uses: jdx/mise-action@6d1e696aa24c1aa1bcc1adea0212707c71ab78a8 # v3.6.1 + with: + version: v2026.1.4 + sha256: 79c798e39b83f0dd80108eaa88c6ca63689695ae975fd6786e7a353ef9f87002 + + - name: Cache local Maven repository + uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Build benchmarks module + run: ./mvnw -pl benchmarks -am -DskipTests clean package + + - name: Run JMH benchmarks + id: benchmarks + run: | + # 3 forks, 3 warmup, 5 measurement iterations (~60 min total) + DEFAULT_ARGS="-f 3 -wi 3 -i 5" + JMH_ARGS="${{ github.event.inputs.jmh_args }}" + JMH_ARGS="${JMH_ARGS:-$DEFAULT_ARGS}" + + echo "Running benchmarks with args: $JMH_ARGS" + + # Run benchmarks and output JSON (captures full results) + java -jar ./benchmarks/target/benchmarks.jar \ + -rf json \ + -rff benchmark-results.json \ + $JMH_ARGS 2>&1 | tee benchmark-output.log + + - name: Generate benchmark summary + run: | + python3 .mise/tasks/generate_benchmark_summary.py \ + --input benchmark-results.json \ + --output-dir benchmark-results \ + --commit-sha "${{ github.sha }}" + env: + GITHUB_REPOSITORY: ${{ github.repository }} + + - name: Checkout or create benchmarks branch + run: | + # Check if benchmarks branch exists + if git ls-remote --heads origin benchmarks | grep -q benchmarks; then + git fetch origin benchmarks + git checkout benchmarks + # Preserve history directory if it exists + if [ -d history ]; then + cp -r history benchmark-results/ + fi + else + git checkout --orphan benchmarks + git rm -rf . 2>/dev/null || true + fi + + - name: Commit and push results + run: | + # Configure git + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Move results to root + cp -r benchmark-results/* . + rm -rf benchmark-results + + # Add all files + git add -A + + # Commit with date + DATE=$(date -u +"%Y-%m-%d") + COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7) + + git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \ + -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit" + + # Push to benchmarks branch + git push origin benchmarks --force-with-lease || git push origin benchmarks diff --git a/.gitignore b/.gitignore index b727017a9..83f5595ba 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,7 @@ dependency-reduced-pom.xml **/.settings/ docs/public .lycheecache + +benchmark-results/ +benchmark-results.json +benchmark-output.log diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py new file mode 100644 index 000000000..0581b352e --- /dev/null +++ b/.mise/tasks/generate_benchmark_summary.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 + +# [MISE] description="Generate markdown summary from JMH benchmark JSON results" +# [MISE] alias="generate-benchmark-summary" + +""" +Generate a markdown summary from JMH benchmark JSON results. + +Usage: + python3 .mise/tasks/generate_benchmark_summary.py [--input results.json] [--output-dir ./benchmark-results] + +This script: +1. Reads JMH JSON output +2. Generates a README.md with formatted tables +3. Copies results to the output directory with historical naming +""" + +import argparse +import json +import os +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional + + +def parse_args(): + parser = argparse.ArgumentParser(description="Generate benchmark summary from JMH JSON") + parser.add_argument( + "--input", + default="benchmark-results.json", + help="Path to JMH JSON results file (default: benchmark-results.json)", + ) + parser.add_argument( + "--output-dir", + default="benchmark-results", + help="Output directory for results (default: benchmark-results)", + ) + parser.add_argument( + "--commit-sha", + default=None, + help="Git commit SHA (default: read from git or 'local')", + ) + return parser.parse_args() + + +def get_commit_sha(provided_sha: Optional[str]) -> str: + """Get commit SHA from argument, git, or return 'local'.""" + if provided_sha: + return provided_sha + + # Try to get from git + try: + import subprocess + + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + return result.stdout.strip() + except Exception: + pass + + return "local" + + +def format_score(score) -> str: + """Format score with appropriate precision.""" + try: + val = float(score) + if val >= 1_000_000: + return f"{val / 1_000_000:.2f}M" + elif val >= 1_000: + return f"{val / 1_000:.2f}K" + else: + return f"{val:.2f}" + except (ValueError, TypeError): + return str(score) + + +def format_error(error) -> str: + """Format error value, handling NaN.""" + try: + error_val = float(error) + if error_val != error_val: # NaN check + return "" + elif error_val >= 1_000: + return f"± {error_val / 1_000:.2f}K" + else: + return f"± {error_val:.2f}" + except (ValueError, TypeError): + return "" + + +def generate_markdown(results: List, commit_sha: str, repo: str) -> str: + """Generate markdown summary from JMH results.""" + commit_short = commit_sha[:7] + datetime_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Extract metadata from first result + first = results[0] if results else {} + jdk_version = first.get("jdkVersion", "unknown") + vm_name = first.get("vmName", "unknown") + threads = first.get("threads", "?") + forks = first.get("forks", "?") + warmup_iters = first.get("warmupIterations", "?") + measure_iters = first.get("measurementIterations", "?") + + md = [] + md.append("# Prometheus Java Client Benchmarks") + md.append("") + + # Run metadata + md.append("## Run Information") + md.append("") + md.append(f"- **Date:** {datetime_str}") + if commit_sha != "local": + md.append(f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})") + else: + md.append(f"- **Commit:** `{commit_short}` (local run)") + md.append(f"- **JDK:** {jdk_version} ({vm_name})") + md.append(f"- **Configuration:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads") + md.append("") + + # Group by benchmark class + benchmarks_by_class: Dict[str, List] = {} + for b in results: + name = b.get("benchmark", "") + parts = name.rsplit(".", 1) + if len(parts) == 2: + class_name, method = parts + class_short = class_name.split(".")[-1] + else: + class_short = "Other" + benchmarks_by_class.setdefault(class_short, []).append(b) + + md.append("## Results") + md.append("") + + # Generate table for each class + for class_name in sorted(benchmarks_by_class.keys()): + benchmarks = benchmarks_by_class[class_name] + md.append(f"### {class_name}") + md.append("") + + # Sort by score descending + sorted_benchmarks = sorted( + benchmarks, + key=lambda x: x.get("primaryMetric", {}).get("score", 0), + reverse=True + ) + + md.append("| Benchmark | Score | Error | Units | |") + md.append("|:----------|------:|------:|:------|:---|") + + best_score = sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) if sorted_benchmarks else 1 + + for i, b in enumerate(sorted_benchmarks): + name = b.get("benchmark", "").split(".")[-1] + score = b.get("primaryMetric", {}).get("score", 0) + error = b.get("primaryMetric", {}).get("scoreError", 0) + unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s") + + score_fmt = format_score(score) + error_fmt = format_error(error) + + # Calculate relative performance as multiplier + try: + if i == 0: + relative_fmt = "**fastest**" + else: + multiplier = float(best_score) / float(score) + if multiplier >= 10: + relative_fmt = f"{multiplier:.0f}x slower" + else: + relative_fmt = f"{multiplier:.1f}x slower" + except (ValueError, TypeError, ZeroDivisionError): + relative_fmt = "" + + md.append(f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |") + + md.append("") + + md.append("### Raw Results") + md.append("") + md.append("```") + md.append(f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12} Units") + + for b in sorted(results, key=lambda x: x.get("benchmark", "")): + name = b.get("benchmark", "").replace("io.prometheus.metrics.benchmarks.", "") + mode = b.get("mode", "thrpt") + cnt = b.get("measurementIterations", 0) * b.get("forks", 1) + score = b.get("primaryMetric", {}).get("score", 0) + error = b.get("primaryMetric", {}).get("scoreError", 0) + unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s") + + try: + score_str = f"{float(score):.3f}" + except (ValueError, TypeError): + score_str = str(score) + + try: + error_val = float(error) + if error_val != error_val: # NaN + error_str = "" + else: + error_str = f"± {error_val:.3f}" + except (ValueError, TypeError): + error_str = "" + + md.append(f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12} {unit}") + + md.append("```") + md.append("") + + md.append("## Notes") + md.append("") + md.append("- **Score** = Throughput in operations per second (higher is better)") + md.append("- **Error** = 99.9% confidence interval") + md.append("") + + md.append("## Benchmark Descriptions") + md.append("") + md.append("| Benchmark | Description |") + md.append("|:----------|:------------|") + md.append("| **CounterBenchmark** | Compares counter increment performance across Prometheus, OpenTelemetry, simpleclient (0.16.0), and Codahale Metrics |") + md.append("| **HistogramBenchmark** | Compares histogram observation performance (classic buckets vs native/exponential) |") + md.append("| **TextFormatUtilBenchmark** | Measures metric exposition format writing speed (Prometheus text vs OpenMetrics) |") + md.append("") + return "\n".join(md) + + +def main(): + args = parse_args() + + # Check input file exists + input_path = Path(args.input) + if not input_path.exists(): + print(f"Error: Input file not found: {input_path}") + sys.exit(1) + + # Load JSON results + print(f"Reading results from: {input_path}") + with open(input_path, "r") as f: + results = json.load(f) + + print(f"Found {len(results)} benchmark results") + + # Get commit info + commit_sha = get_commit_sha(args.commit_sha) + commit_short = commit_sha[:7] + repo = os.environ.get("GITHUB_REPOSITORY", "prometheus/client_java") + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + history_dir = output_dir / "history" + history_dir.mkdir(parents=True, exist_ok=True) + + results_json_path = output_dir / "results.json" + shutil.copy(input_path, results_json_path) + print(f"Copied results to: {results_json_path}") + + date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + history_path = history_dir / f"{date_str}-{commit_short}.json" + shutil.copy(input_path, history_path) + print(f"Saved historical entry: {history_path}") + + markdown = generate_markdown(results, commit_sha, repo) + readme_path = output_dir / "README.md" + with open(readme_path, "w") as f: + f.write(markdown) + print(f"Generated summary: {readme_path}") + + print(f"\nDone! Results are in: {output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/README.md b/benchmarks/README.md index 3bba56422..c201d356b 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -10,6 +10,17 @@ Run benchmarks and update the results in the Javadoc of the benchmark classes: mise run update-benchmarks ``` +### Different benchmark configurations + +The full benchmark suite takes approximately 2 hours with JMH defaults. +For faster iterations, use these preset configurations: + +| Command | Duration | Use Case | +|---------|----------|----------| +| `mise run benchmark:quick` | ~10 min | Quick smoke test during development | +| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy | +| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation | + ### Running benchmarks manually ```shell @@ -22,6 +33,25 @@ Run only one specific benchmark: java -jar ./benchmarks/target/benchmarks.jar CounterBenchmark ``` +### Custom JMH arguments + +You can pass custom JMH arguments: + +```shell +# Quick run: 1 fork, 1 warmup iteration, 3 measurement iterations +mise run update-benchmarks -- --jmh-args "-f 1 -wi 1 -i 3" + +# Standard CI: 3 forks, 3 warmup iterations, 5 measurement iterations +mise run update-benchmarks -- --jmh-args "-f 3 -wi 3 -i 5" +``` + +JMH parameter reference: +- `-f N`: Number of forks (JVM restarts) +- `-wi N`: Number of warmup iterations +- `-i N`: Number of measurement iterations +- `-w Ns`: Warmup iteration time (default: 10s) +- `-r Ns`: Measurement iteration time (default: 10s) + ## Results See Javadoc of the benchmark classes: diff --git a/mise.toml b/mise.toml index 56d2680f5..146ca1fe9 100644 --- a/mise.toml +++ b/mise.toml @@ -83,3 +83,34 @@ run = [ "hugo --gc --minify --baseURL ${BASE_URL}/", "echo 'ls ./public/api' && ls ./public/api" ] + +[tasks."benchmark:quick"] +description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)" +run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'" + +[tasks."benchmark:standard"] +description = "Run benchmarks with standard CI settings (~60 min)" +run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'" + +[tasks."benchmark:full"] +description = "Run benchmarks with full JMH defaults (~2 hours)" +run = "python3 ./.mise/tasks/update_benchmarks.py" + +[tasks."benchmark:test"] +description = "Run minimal benchmark test (~1 min) and generate summary" +run = [ + "./mvnw -pl benchmarks -am -DskipTests package -q", + "java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json -f 1 -wi 1 -i 3 -r 1s", + "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results", + "echo ''", + "echo '=== Generated files ==='", + "ls -la benchmark-results/", + "ls -la benchmark-results/history/", + "echo ''", + "echo '=== README.md ==='", + "cat benchmark-results/README.md" +] + +[tasks."benchmark:generate-summary"] +description = "Generate summary from existing benchmark-results.json" +run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results" From 83edd829517ce758e26850552aac9cea4d3b22e9 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 15:14:10 -0500 Subject: [PATCH 2/8] fix whats inclued Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 35 ++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index acd95c605..59a6373fc 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -76,40 +76,41 @@ jobs: env: GITHUB_REPOSITORY: ${{ github.repository }} - - name: Checkout or create benchmarks branch + - name: Commit and push results to benchmarks branch run: | - # Check if benchmarks branch exists + # Save results to a temp location + mkdir -p /tmp/benchmark-output + cp -r benchmark-results/* /tmp/benchmark-output/ + + # Configure git + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Checkout or create benchmarks branch if git ls-remote --heads origin benchmarks | grep -q benchmarks; then git fetch origin benchmarks git checkout benchmarks - # Preserve history directory if it exists + # Preserve existing history if [ -d history ]; then - cp -r history benchmark-results/ + cp -r history /tmp/benchmark-output/ fi else git checkout --orphan benchmarks - git rm -rf . 2>/dev/null || true fi - - name: Commit and push results - run: | - # Configure git - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" + # Clean working directory + git rm -rf . 2>/dev/null || true + find . -mindepth 1 -maxdepth 1 ! -name '.git' -exec rm -rf {} + - # Move results to root - cp -r benchmark-results/* . - rm -rf benchmark-results + # Copy only the benchmark results + cp -r /tmp/benchmark-output/* . - # Add all files - git add -A + git add README.md results.json history/ - # Commit with date DATE=$(date -u +"%Y-%m-%d") COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7) git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \ -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit" - # Push to benchmarks branch git push origin benchmarks --force-with-lease || git push origin benchmarks From 0efa6227dad7c61fbe78c42b03c7f243b4d844e7 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 15:23:47 -0500 Subject: [PATCH 3/8] host info Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 5 +- .mise/tasks/generate_benchmark_summary.py | 80 ++++++++++++++++++++++- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index 59a6373fc..057192cb3 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -52,7 +52,6 @@ jobs: run: ./mvnw -pl benchmarks -am -DskipTests clean package - name: Run JMH benchmarks - id: benchmarks run: | # 3 forks, 3 warmup, 5 measurement iterations (~60 min total) DEFAULT_ARGS="-f 3 -wi 3 -i 5" @@ -61,11 +60,10 @@ jobs: echo "Running benchmarks with args: $JMH_ARGS" - # Run benchmarks and output JSON (captures full results) java -jar ./benchmarks/target/benchmarks.jar \ -rf json \ -rff benchmark-results.json \ - $JMH_ARGS 2>&1 | tee benchmark-output.log + $JMH_ARGS - name: Generate benchmark summary run: | @@ -82,7 +80,6 @@ jobs: mkdir -p /tmp/benchmark-output cp -r benchmark-results/* /tmp/benchmark-output/ - # Configure git git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py index 0581b352e..3c23899dc 100644 --- a/.mise/tasks/generate_benchmark_summary.py +++ b/.mise/tasks/generate_benchmark_summary.py @@ -45,6 +45,67 @@ def parse_args(): return parser.parse_args() +def get_system_info() -> Dict[str, str]: + """Capture system hardware information.""" + import platform + import multiprocessing + + info = {} + + # CPU cores + try: + info["cpu_cores"] = str(multiprocessing.cpu_count()) + except Exception: + pass + + # CPU model - try Linux first, then macOS + try: + with open("/proc/cpuinfo", "r") as f: + for line in f: + if line.startswith("model name"): + info["cpu_model"] = line.split(":")[1].strip() + break + except FileNotFoundError: + # macOS + try: + import subprocess + result = subprocess.run( + ["sysctl", "-n", "machdep.cpu.brand_string"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0: + info["cpu_model"] = result.stdout.strip() + except Exception: + pass + + # Memory - try Linux first, then macOS + try: + with open("/proc/meminfo", "r") as f: + for line in f: + if line.startswith("MemTotal"): + kb = int(line.split()[1]) + info["memory_gb"] = str(round(kb / 1024 / 1024)) + break + except FileNotFoundError: + # macOS + try: + import subprocess + result = subprocess.run( + ["sysctl", "-n", "hw.memsize"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0: + bytes_mem = int(result.stdout.strip()) + info["memory_gb"] = str(round(bytes_mem / 1024 / 1024 / 1024)) + except Exception: + pass + + # OS + info["os"] = f"{platform.system()} {platform.release()}" + + return info + + def get_commit_sha(provided_sha: Optional[str]) -> str: """Get commit SHA from argument, git, or return 'local'.""" if provided_sha: @@ -110,6 +171,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: warmup_iters = first.get("warmupIterations", "?") measure_iters = first.get("measurementIterations", "?") + # Get system info + sysinfo = get_system_info() + md = [] md.append("# Prometheus Java Client Benchmarks") md.append("") @@ -123,7 +187,21 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: else: md.append(f"- **Commit:** `{commit_short}` (local run)") md.append(f"- **JDK:** {jdk_version} ({vm_name})") - md.append(f"- **Configuration:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads") + md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads") + + # Hardware info + hw_parts = [] + if sysinfo.get("cpu_model"): + hw_parts.append(sysinfo["cpu_model"]) + if sysinfo.get("cpu_cores"): + hw_parts.append(f"{sysinfo['cpu_cores']} cores") + if sysinfo.get("memory_gb"): + hw_parts.append(f"{sysinfo['memory_gb']} GB RAM") + if hw_parts: + md.append(f"- **Hardware:** {', '.join(hw_parts)}") + if sysinfo.get("os"): + md.append(f"- **OS:** {sysinfo['os']}") + md.append("") # Group by benchmark class From a8dacf2152e859954f911e34d0b1afd7f276128f Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 15:34:45 -0500 Subject: [PATCH 4/8] host info Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index 057192cb3..a66a51b99 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -83,16 +83,16 @@ jobs: git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - # Checkout or create benchmarks branch + # Checkout or create benchmarks branch (use -- to disambiguate from benchmarks/ directory) if git ls-remote --heads origin benchmarks | grep -q benchmarks; then git fetch origin benchmarks - git checkout benchmarks + git switch benchmarks # Preserve existing history if [ -d history ]; then cp -r history /tmp/benchmark-output/ fi else - git checkout --orphan benchmarks + git switch --orphan benchmarks fi # Clean working directory From aa12656149c30e385ee3a3d174c3b84d6909f646 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 15:50:45 -0500 Subject: [PATCH 5/8] cleanup Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 1 - .mise/tasks/generate_benchmark_summary.py | 12 ------------ mise.toml | 19 ------------------- 3 files changed, 32 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index a66a51b99..24ed26cea 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -23,7 +23,6 @@ defaults: jobs: benchmark: - # if: github.repository == 'prometheus/client_java' # Uncomment for production runs-on: ubuntu-24.04 permissions: contents: write diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py index 3c23899dc..24daadca1 100644 --- a/.mise/tasks/generate_benchmark_summary.py +++ b/.mise/tasks/generate_benchmark_summary.py @@ -52,13 +52,11 @@ def get_system_info() -> Dict[str, str]: info = {} - # CPU cores try: info["cpu_cores"] = str(multiprocessing.cpu_count()) except Exception: pass - # CPU model - try Linux first, then macOS try: with open("/proc/cpuinfo", "r") as f: for line in f: @@ -78,7 +76,6 @@ def get_system_info() -> Dict[str, str]: except Exception: pass - # Memory - try Linux first, then macOS try: with open("/proc/meminfo", "r") as f: for line in f: @@ -100,7 +97,6 @@ def get_system_info() -> Dict[str, str]: except Exception: pass - # OS info["os"] = f"{platform.system()} {platform.release()}" return info @@ -111,7 +107,6 @@ def get_commit_sha(provided_sha: Optional[str]) -> str: if provided_sha: return provided_sha - # Try to get from git try: import subprocess @@ -171,14 +166,12 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: warmup_iters = first.get("warmupIterations", "?") measure_iters = first.get("measurementIterations", "?") - # Get system info sysinfo = get_system_info() md = [] md.append("# Prometheus Java Client Benchmarks") md.append("") - # Run metadata md.append("## Run Information") md.append("") md.append(f"- **Date:** {datetime_str}") @@ -189,7 +182,6 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: md.append(f"- **JDK:** {jdk_version} ({vm_name})") md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads") - # Hardware info hw_parts = [] if sysinfo.get("cpu_model"): hw_parts.append(sysinfo["cpu_model"]) @@ -315,25 +307,21 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: def main(): args = parse_args() - # Check input file exists input_path = Path(args.input) if not input_path.exists(): print(f"Error: Input file not found: {input_path}") sys.exit(1) - # Load JSON results print(f"Reading results from: {input_path}") with open(input_path, "r") as f: results = json.load(f) print(f"Found {len(results)} benchmark results") - # Get commit info commit_sha = get_commit_sha(args.commit_sha) commit_short = commit_sha[:7] repo = os.environ.get("GITHUB_REPOSITORY", "prometheus/client_java") - # Create output directory output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) history_dir = output_dir / "history" diff --git a/mise.toml b/mise.toml index 146ca1fe9..fbdb4e6dd 100644 --- a/mise.toml +++ b/mise.toml @@ -88,29 +88,10 @@ run = [ description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)" run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'" -[tasks."benchmark:standard"] -description = "Run benchmarks with standard CI settings (~60 min)" -run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'" - [tasks."benchmark:full"] description = "Run benchmarks with full JMH defaults (~2 hours)" run = "python3 ./.mise/tasks/update_benchmarks.py" -[tasks."benchmark:test"] -description = "Run minimal benchmark test (~1 min) and generate summary" -run = [ - "./mvnw -pl benchmarks -am -DskipTests package -q", - "java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json -f 1 -wi 1 -i 3 -r 1s", - "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results", - "echo ''", - "echo '=== Generated files ==='", - "ls -la benchmark-results/", - "ls -la benchmark-results/history/", - "echo ''", - "echo '=== README.md ==='", - "cat benchmark-results/README.md" -] - [tasks."benchmark:generate-summary"] description = "Generate summary from existing benchmark-results.json" run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results" From bc60f49eeb1b0f4ebd02318def328f8d6158726a Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Sun, 25 Jan 2026 15:57:20 -0500 Subject: [PATCH 6/8] linting Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 5 +++-- .mise/tasks/generate_benchmark_summary.py | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index 24ed26cea..0c6d4dff9 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -54,8 +54,7 @@ jobs: run: | # 3 forks, 3 warmup, 5 measurement iterations (~60 min total) DEFAULT_ARGS="-f 3 -wi 3 -i 5" - JMH_ARGS="${{ github.event.inputs.jmh_args }}" - JMH_ARGS="${JMH_ARGS:-$DEFAULT_ARGS}" + JMH_ARGS="${INPUT_JMH_ARGS:-$DEFAULT_ARGS}" echo "Running benchmarks with args: $JMH_ARGS" @@ -63,6 +62,8 @@ jobs: -rf json \ -rff benchmark-results.json \ $JMH_ARGS + env: + INPUT_JMH_ARGS: ${{ github.event.inputs.jmh_args }} - name: Generate benchmark summary run: | diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py index 24daadca1..9736f4c70 100644 --- a/.mise/tasks/generate_benchmark_summary.py +++ b/.mise/tasks/generate_benchmark_summary.py @@ -180,7 +180,8 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: else: md.append(f"- **Commit:** `{commit_short}` (local run)") md.append(f"- **JDK:** {jdk_version} ({vm_name})") - md.append(f"- **Benchmark config:** {forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads") + bench_cfg = f"{forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads" + md.append(f"- **Benchmark config:** {bench_cfg}") hw_parts = [] if sysinfo.get("cpu_model"): @@ -297,9 +298,17 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: md.append("") md.append("| Benchmark | Description |") md.append("|:----------|:------------|") - md.append("| **CounterBenchmark** | Compares counter increment performance across Prometheus, OpenTelemetry, simpleclient (0.16.0), and Codahale Metrics |") - md.append("| **HistogramBenchmark** | Compares histogram observation performance (classic buckets vs native/exponential) |") - md.append("| **TextFormatUtilBenchmark** | Measures metric exposition format writing speed (Prometheus text vs OpenMetrics) |") + md.append( + "| **CounterBenchmark** | Counter increment performance: " + "Prometheus, OpenTelemetry, simpleclient, Codahale |" + ) + md.append( + "| **HistogramBenchmark** | Histogram observation performance " + "(classic vs native/exponential) |" + ) + md.append( + "| **TextFormatUtilBenchmark** | Metric exposition format writing speed |" + ) md.append("") return "\n".join(md) From 4f72fa3263c2fb5044b99bb9fae141b3e8cf65e4 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Mon, 26 Jan 2026 06:58:52 -0500 Subject: [PATCH 7/8] linting Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 7 ++-- .mise/tasks/generate_benchmark_summary.py | 40 +++++++++++++++++------ benchmarks/README.md | 11 ++++--- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index 0c6d4dff9..7e798c6c5 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -58,6 +58,7 @@ jobs: echo "Running benchmarks with args: $JMH_ARGS" + # shellcheck disable=SC2086 # Intentional word splitting for JMH args java -jar ./benchmarks/target/benchmarks.jar \ -rf json \ -rff benchmark-results.json \ @@ -107,7 +108,9 @@ jobs: DATE=$(date -u +"%Y-%m-%d") COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7) - git commit -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \ - -m "Automated benchmark run from commit ${{ github.sha }}" || echo "No changes to commit" + git commit \ + -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \ + -m "From commit ${{ github.sha }}" \ + || echo "No changes to commit" git push origin benchmarks --force-with-lease || git push origin benchmarks diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py index 9736f4c70..0b0c4fb01 100644 --- a/.mise/tasks/generate_benchmark_summary.py +++ b/.mise/tasks/generate_benchmark_summary.py @@ -26,7 +26,9 @@ def parse_args(): - parser = argparse.ArgumentParser(description="Generate benchmark summary from JMH JSON") + parser = argparse.ArgumentParser( + description="Generate benchmark summary from JMH JSON" + ) parser.add_argument( "--input", default="benchmark-results.json", @@ -47,8 +49,8 @@ def parse_args(): def get_system_info() -> Dict[str, str]: """Capture system hardware information.""" - import platform import multiprocessing + import platform info = {} @@ -67,9 +69,12 @@ def get_system_info() -> Dict[str, str]: # macOS try: import subprocess + result = subprocess.run( ["sysctl", "-n", "machdep.cpu.brand_string"], - capture_output=True, text=True, timeout=5 + capture_output=True, + text=True, + timeout=5, ) if result.returncode == 0: info["cpu_model"] = result.stdout.strip() @@ -87,9 +92,12 @@ def get_system_info() -> Dict[str, str]: # macOS try: import subprocess + result = subprocess.run( ["sysctl", "-n", "hw.memsize"], - capture_output=True, text=True, timeout=5 + capture_output=True, + text=True, + timeout=5, ) if result.returncode == 0: bytes_mem = int(result.stdout.strip()) @@ -176,7 +184,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: md.append("") md.append(f"- **Date:** {datetime_str}") if commit_sha != "local": - md.append(f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})") + md.append( + f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})" + ) else: md.append(f"- **Commit:** `{commit_short}` (local run)") md.append(f"- **JDK:** {jdk_version} ({vm_name})") @@ -222,13 +232,17 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: sorted_benchmarks = sorted( benchmarks, key=lambda x: x.get("primaryMetric", {}).get("score", 0), - reverse=True + reverse=True, ) md.append("| Benchmark | Score | Error | Units | |") md.append("|:----------|------:|------:|:------|:---|") - best_score = sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) if sorted_benchmarks else 1 + best_score = ( + sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) + if sorted_benchmarks + else 1 + ) for i, b in enumerate(sorted_benchmarks): name = b.get("benchmark", "").split(".")[-1] @@ -252,14 +266,18 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: except (ValueError, TypeError, ZeroDivisionError): relative_fmt = "" - md.append(f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |") + md.append( + f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |" + ) md.append("") md.append("### Raw Results") md.append("") md.append("```") - md.append(f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12} Units") + md.append( + f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12} Units" + ) for b in sorted(results, key=lambda x: x.get("benchmark", "")): name = b.get("benchmark", "").replace("io.prometheus.metrics.benchmarks.", "") @@ -283,7 +301,9 @@ def generate_markdown(results: List, commit_sha: str, repo: str) -> str: except (ValueError, TypeError): error_str = "" - md.append(f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12} {unit}") + md.append( + f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12} {unit}" + ) md.append("```") md.append("") diff --git a/benchmarks/README.md b/benchmarks/README.md index c201d356b..b4c824d85 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -15,11 +15,11 @@ mise run update-benchmarks The full benchmark suite takes approximately 2 hours with JMH defaults. For faster iterations, use these preset configurations: -| Command | Duration | Use Case | -|---------|----------|----------| -| `mise run benchmark:quick` | ~10 min | Quick smoke test during development | -| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy | -| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation | +| Command | Duration | Use Case | +| ----------------------------- | -------- | ---------------------------------------- | +| `mise run benchmark:quick` | ~10 min | Quick smoke test during development | +| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy | +| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation | ### Running benchmarks manually @@ -46,6 +46,7 @@ mise run update-benchmarks -- --jmh-args "-f 3 -wi 3 -i 5" ``` JMH parameter reference: + - `-f N`: Number of forks (JVM restarts) - `-wi N`: Number of warmup iterations - `-i N`: Number of measurement iterations From 0779c50de28f9ca64d8b358d9003fe7474f911c0 Mon Sep 17 00:00:00 2001 From: Jay DeLuca Date: Mon, 26 Jan 2026 13:56:08 -0500 Subject: [PATCH 8/8] add mise command for running the benchmarks Signed-off-by: Jay DeLuca --- .github/workflows/nightly-benchmarks.yml | 20 +++----------------- mise.toml | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml index 7e798c6c5..acc388546 100644 --- a/.github/workflows/nightly-benchmarks.yml +++ b/.github/workflows/nightly-benchmarks.yml @@ -47,28 +47,14 @@ jobs: restore-keys: | ${{ runner.os }}-maven- - - name: Build benchmarks module - run: ./mvnw -pl benchmarks -am -DskipTests clean package - - name: Run JMH benchmarks - run: | - # 3 forks, 3 warmup, 5 measurement iterations (~60 min total) - DEFAULT_ARGS="-f 3 -wi 3 -i 5" - JMH_ARGS="${INPUT_JMH_ARGS:-$DEFAULT_ARGS}" - - echo "Running benchmarks with args: $JMH_ARGS" - - # shellcheck disable=SC2086 # Intentional word splitting for JMH args - java -jar ./benchmarks/target/benchmarks.jar \ - -rf json \ - -rff benchmark-results.json \ - $JMH_ARGS + run: mise run benchmark:ci-json env: - INPUT_JMH_ARGS: ${{ github.event.inputs.jmh_args }} + JMH_ARGS: ${{ github.event.inputs.jmh_args }} - name: Generate benchmark summary run: | - python3 .mise/tasks/generate_benchmark_summary.py \ + mise run benchmark:generate-summary \ --input benchmark-results.json \ --output-dir benchmark-results \ --commit-sha "${{ github.sha }}" diff --git a/mise.toml b/mise.toml index fbdb4e6dd..3718e5ca4 100644 --- a/mise.toml +++ b/mise.toml @@ -88,10 +88,19 @@ run = [ description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)" run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'" -[tasks."benchmark:full"] -description = "Run benchmarks with full JMH defaults (~2 hours)" -run = "python3 ./.mise/tasks/update_benchmarks.py" +[tasks."benchmark:ci"] +description = "Run benchmarks with CI configuration (3 forks, 3 warmup, 5 measurement iterations (~60 min total)" +run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'" + +[tasks."benchmark:ci-json"] +description = "Run benchmarks with CI configuration and JSON output (for workflow/testing)" +run = """ +./mvnw -pl benchmarks -am -DskipTests clean package +JMH_ARGS="${JMH_ARGS:--f 3 -wi 3 -i 5}" +echo "Running benchmarks with args: $JMH_ARGS" +java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json $JMH_ARGS +""" [tasks."benchmark:generate-summary"] description = "Generate summary from existing benchmark-results.json" -run = "python3 ./.mise/tasks/generate_benchmark_summary.py --input benchmark-results.json --output-dir benchmark-results" +run = "python3 ./.mise/tasks/generate_benchmark_summary.py"