diff --git a/.github/workflows/nightly-benchmarks.yml b/.github/workflows/nightly-benchmarks.yml new file mode 100644 index 000000000..acc388546 --- /dev/null +++ b/.github/workflows/nightly-benchmarks.yml @@ -0,0 +1,102 @@ +--- +name: Nightly Benchmarks + +on: + schedule: + # Run at 2 AM UTC every day + - cron: "0 2 * * *" + workflow_dispatch: + inputs: + jmh_args: + description: "Additional JMH arguments (e.g., '-f 1 -wi 1 -i 3' for quick run)" + required: false + default: "" + +permissions: {} + +concurrency: + group: "benchmarks" + +defaults: + run: + shell: bash + +jobs: + benchmark: + runs-on: ubuntu-24.04 + permissions: + contents: write + steps: + - name: Checkout main branch + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: true + fetch-depth: 0 + + - name: Setup mise + uses: jdx/mise-action@6d1e696aa24c1aa1bcc1adea0212707c71ab78a8 # v3.6.1 + with: + version: v2026.1.4 + sha256: 79c798e39b83f0dd80108eaa88c6ca63689695ae975fd6786e7a353ef9f87002 + + - name: Cache local Maven repository + uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Run JMH benchmarks + run: mise run benchmark:ci-json + env: + JMH_ARGS: ${{ github.event.inputs.jmh_args }} + + - name: Generate benchmark summary + run: | + mise run benchmark:generate-summary \ + --input benchmark-results.json \ + --output-dir benchmark-results \ + --commit-sha "${{ github.sha }}" + env: + GITHUB_REPOSITORY: ${{ github.repository }} + + - name: Commit and push results to benchmarks branch + run: | + # Save results to a temp location + mkdir -p /tmp/benchmark-output + cp -r benchmark-results/* /tmp/benchmark-output/ + + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Checkout or create benchmarks branch (use -- to disambiguate from benchmarks/ directory) + if git ls-remote --heads origin benchmarks | grep -q benchmarks; then + git fetch origin benchmarks + git switch benchmarks + # Preserve existing history + if [ -d history ]; then + cp -r history /tmp/benchmark-output/ + fi + else + git switch --orphan benchmarks + fi + + # Clean working directory + git rm -rf . 2>/dev/null || true + find . -mindepth 1 -maxdepth 1 ! -name '.git' -exec rm -rf {} + + + # Copy only the benchmark results + cp -r /tmp/benchmark-output/* . + + git add README.md results.json history/ + + DATE=$(date -u +"%Y-%m-%d") + COMMIT_SHORT=$(echo "${{ github.sha }}" | cut -c1-7) + + git commit \ + -m "Benchmark results for ${DATE} (${COMMIT_SHORT})" \ + -m "From commit ${{ github.sha }}" \ + || echo "No changes to commit" + + git push origin benchmarks --force-with-lease || git push origin benchmarks diff --git a/.gitignore b/.gitignore index b727017a9..83f5595ba 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,7 @@ dependency-reduced-pom.xml **/.settings/ docs/public .lycheecache + +benchmark-results/ +benchmark-results.json +benchmark-output.log diff --git a/.mise/tasks/generate_benchmark_summary.py b/.mise/tasks/generate_benchmark_summary.py new file mode 100644 index 000000000..0b0c4fb01 --- /dev/null +++ b/.mise/tasks/generate_benchmark_summary.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 + +# [MISE] description="Generate markdown summary from JMH benchmark JSON results" +# [MISE] alias="generate-benchmark-summary" + +""" +Generate a markdown summary from JMH benchmark JSON results. + +Usage: + python3 .mise/tasks/generate_benchmark_summary.py [--input results.json] [--output-dir ./benchmark-results] + +This script: +1. Reads JMH JSON output +2. Generates a README.md with formatted tables +3. Copies results to the output directory with historical naming +""" + +import argparse +import json +import os +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate benchmark summary from JMH JSON" + ) + parser.add_argument( + "--input", + default="benchmark-results.json", + help="Path to JMH JSON results file (default: benchmark-results.json)", + ) + parser.add_argument( + "--output-dir", + default="benchmark-results", + help="Output directory for results (default: benchmark-results)", + ) + parser.add_argument( + "--commit-sha", + default=None, + help="Git commit SHA (default: read from git or 'local')", + ) + return parser.parse_args() + + +def get_system_info() -> Dict[str, str]: + """Capture system hardware information.""" + import multiprocessing + import platform + + info = {} + + try: + info["cpu_cores"] = str(multiprocessing.cpu_count()) + except Exception: + pass + + try: + with open("/proc/cpuinfo", "r") as f: + for line in f: + if line.startswith("model name"): + info["cpu_model"] = line.split(":")[1].strip() + break + except FileNotFoundError: + # macOS + try: + import subprocess + + result = subprocess.run( + ["sysctl", "-n", "machdep.cpu.brand_string"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + info["cpu_model"] = result.stdout.strip() + except Exception: + pass + + try: + with open("/proc/meminfo", "r") as f: + for line in f: + if line.startswith("MemTotal"): + kb = int(line.split()[1]) + info["memory_gb"] = str(round(kb / 1024 / 1024)) + break + except FileNotFoundError: + # macOS + try: + import subprocess + + result = subprocess.run( + ["sysctl", "-n", "hw.memsize"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + bytes_mem = int(result.stdout.strip()) + info["memory_gb"] = str(round(bytes_mem / 1024 / 1024 / 1024)) + except Exception: + pass + + info["os"] = f"{platform.system()} {platform.release()}" + + return info + + +def get_commit_sha(provided_sha: Optional[str]) -> str: + """Get commit SHA from argument, git, or return 'local'.""" + if provided_sha: + return provided_sha + + try: + import subprocess + + result = subprocess.run( + ["git", "rev-parse", "HEAD"], + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + return result.stdout.strip() + except Exception: + pass + + return "local" + + +def format_score(score) -> str: + """Format score with appropriate precision.""" + try: + val = float(score) + if val >= 1_000_000: + return f"{val / 1_000_000:.2f}M" + elif val >= 1_000: + return f"{val / 1_000:.2f}K" + else: + return f"{val:.2f}" + except (ValueError, TypeError): + return str(score) + + +def format_error(error) -> str: + """Format error value, handling NaN.""" + try: + error_val = float(error) + if error_val != error_val: # NaN check + return "" + elif error_val >= 1_000: + return f"± {error_val / 1_000:.2f}K" + else: + return f"± {error_val:.2f}" + except (ValueError, TypeError): + return "" + + +def generate_markdown(results: List, commit_sha: str, repo: str) -> str: + """Generate markdown summary from JMH results.""" + commit_short = commit_sha[:7] + datetime_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Extract metadata from first result + first = results[0] if results else {} + jdk_version = first.get("jdkVersion", "unknown") + vm_name = first.get("vmName", "unknown") + threads = first.get("threads", "?") + forks = first.get("forks", "?") + warmup_iters = first.get("warmupIterations", "?") + measure_iters = first.get("measurementIterations", "?") + + sysinfo = get_system_info() + + md = [] + md.append("# Prometheus Java Client Benchmarks") + md.append("") + + md.append("## Run Information") + md.append("") + md.append(f"- **Date:** {datetime_str}") + if commit_sha != "local": + md.append( + f"- **Commit:** [`{commit_short}`](https://github.com/{repo}/commit/{commit_sha})" + ) + else: + md.append(f"- **Commit:** `{commit_short}` (local run)") + md.append(f"- **JDK:** {jdk_version} ({vm_name})") + bench_cfg = f"{forks} fork(s), {warmup_iters} warmup, {measure_iters} measurement, {threads} threads" + md.append(f"- **Benchmark config:** {bench_cfg}") + + hw_parts = [] + if sysinfo.get("cpu_model"): + hw_parts.append(sysinfo["cpu_model"]) + if sysinfo.get("cpu_cores"): + hw_parts.append(f"{sysinfo['cpu_cores']} cores") + if sysinfo.get("memory_gb"): + hw_parts.append(f"{sysinfo['memory_gb']} GB RAM") + if hw_parts: + md.append(f"- **Hardware:** {', '.join(hw_parts)}") + if sysinfo.get("os"): + md.append(f"- **OS:** {sysinfo['os']}") + + md.append("") + + # Group by benchmark class + benchmarks_by_class: Dict[str, List] = {} + for b in results: + name = b.get("benchmark", "") + parts = name.rsplit(".", 1) + if len(parts) == 2: + class_name, method = parts + class_short = class_name.split(".")[-1] + else: + class_short = "Other" + benchmarks_by_class.setdefault(class_short, []).append(b) + + md.append("## Results") + md.append("") + + # Generate table for each class + for class_name in sorted(benchmarks_by_class.keys()): + benchmarks = benchmarks_by_class[class_name] + md.append(f"### {class_name}") + md.append("") + + # Sort by score descending + sorted_benchmarks = sorted( + benchmarks, + key=lambda x: x.get("primaryMetric", {}).get("score", 0), + reverse=True, + ) + + md.append("| Benchmark | Score | Error | Units | |") + md.append("|:----------|------:|------:|:------|:---|") + + best_score = ( + sorted_benchmarks[0].get("primaryMetric", {}).get("score", 1) + if sorted_benchmarks + else 1 + ) + + for i, b in enumerate(sorted_benchmarks): + name = b.get("benchmark", "").split(".")[-1] + score = b.get("primaryMetric", {}).get("score", 0) + error = b.get("primaryMetric", {}).get("scoreError", 0) + unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s") + + score_fmt = format_score(score) + error_fmt = format_error(error) + + # Calculate relative performance as multiplier + try: + if i == 0: + relative_fmt = "**fastest**" + else: + multiplier = float(best_score) / float(score) + if multiplier >= 10: + relative_fmt = f"{multiplier:.0f}x slower" + else: + relative_fmt = f"{multiplier:.1f}x slower" + except (ValueError, TypeError, ZeroDivisionError): + relative_fmt = "" + + md.append( + f"| {name} | {score_fmt} | {error_fmt} | {unit} | {relative_fmt} |" + ) + + md.append("") + + md.append("### Raw Results") + md.append("") + md.append("```") + md.append( + f"{'Benchmark':<50} {'Mode':>6} {'Cnt':>4} {'Score':>14} {'Error':>12} Units" + ) + + for b in sorted(results, key=lambda x: x.get("benchmark", "")): + name = b.get("benchmark", "").replace("io.prometheus.metrics.benchmarks.", "") + mode = b.get("mode", "thrpt") + cnt = b.get("measurementIterations", 0) * b.get("forks", 1) + score = b.get("primaryMetric", {}).get("score", 0) + error = b.get("primaryMetric", {}).get("scoreError", 0) + unit = b.get("primaryMetric", {}).get("scoreUnit", "ops/s") + + try: + score_str = f"{float(score):.3f}" + except (ValueError, TypeError): + score_str = str(score) + + try: + error_val = float(error) + if error_val != error_val: # NaN + error_str = "" + else: + error_str = f"± {error_val:.3f}" + except (ValueError, TypeError): + error_str = "" + + md.append( + f"{name:<50} {mode:>6} {cnt:>4} {score_str:>14} {error_str:>12} {unit}" + ) + + md.append("```") + md.append("") + + md.append("## Notes") + md.append("") + md.append("- **Score** = Throughput in operations per second (higher is better)") + md.append("- **Error** = 99.9% confidence interval") + md.append("") + + md.append("## Benchmark Descriptions") + md.append("") + md.append("| Benchmark | Description |") + md.append("|:----------|:------------|") + md.append( + "| **CounterBenchmark** | Counter increment performance: " + "Prometheus, OpenTelemetry, simpleclient, Codahale |" + ) + md.append( + "| **HistogramBenchmark** | Histogram observation performance " + "(classic vs native/exponential) |" + ) + md.append( + "| **TextFormatUtilBenchmark** | Metric exposition format writing speed |" + ) + md.append("") + return "\n".join(md) + + +def main(): + args = parse_args() + + input_path = Path(args.input) + if not input_path.exists(): + print(f"Error: Input file not found: {input_path}") + sys.exit(1) + + print(f"Reading results from: {input_path}") + with open(input_path, "r") as f: + results = json.load(f) + + print(f"Found {len(results)} benchmark results") + + commit_sha = get_commit_sha(args.commit_sha) + commit_short = commit_sha[:7] + repo = os.environ.get("GITHUB_REPOSITORY", "prometheus/client_java") + + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + history_dir = output_dir / "history" + history_dir.mkdir(parents=True, exist_ok=True) + + results_json_path = output_dir / "results.json" + shutil.copy(input_path, results_json_path) + print(f"Copied results to: {results_json_path}") + + date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + history_path = history_dir / f"{date_str}-{commit_short}.json" + shutil.copy(input_path, history_path) + print(f"Saved historical entry: {history_path}") + + markdown = generate_markdown(results, commit_sha, repo) + readme_path = output_dir / "README.md" + with open(readme_path, "w") as f: + f.write(markdown) + print(f"Generated summary: {readme_path}") + + print(f"\nDone! Results are in: {output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/README.md b/benchmarks/README.md index 3bba56422..b4c824d85 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -10,6 +10,17 @@ Run benchmarks and update the results in the Javadoc of the benchmark classes: mise run update-benchmarks ``` +### Different benchmark configurations + +The full benchmark suite takes approximately 2 hours with JMH defaults. +For faster iterations, use these preset configurations: + +| Command | Duration | Use Case | +| ----------------------------- | -------- | ---------------------------------------- | +| `mise run benchmark:quick` | ~10 min | Quick smoke test during development | +| `mise run benchmark:standard` | ~60 min | CI/nightly runs with good accuracy | +| `mise run benchmark:full` | ~2 hours | Full JMH defaults for release validation | + ### Running benchmarks manually ```shell @@ -22,6 +33,26 @@ Run only one specific benchmark: java -jar ./benchmarks/target/benchmarks.jar CounterBenchmark ``` +### Custom JMH arguments + +You can pass custom JMH arguments: + +```shell +# Quick run: 1 fork, 1 warmup iteration, 3 measurement iterations +mise run update-benchmarks -- --jmh-args "-f 1 -wi 1 -i 3" + +# Standard CI: 3 forks, 3 warmup iterations, 5 measurement iterations +mise run update-benchmarks -- --jmh-args "-f 3 -wi 3 -i 5" +``` + +JMH parameter reference: + +- `-f N`: Number of forks (JVM restarts) +- `-wi N`: Number of warmup iterations +- `-i N`: Number of measurement iterations +- `-w Ns`: Warmup iteration time (default: 10s) +- `-r Ns`: Measurement iteration time (default: 10s) + ## Results See Javadoc of the benchmark classes: diff --git a/mise.toml b/mise.toml index 56d2680f5..3718e5ca4 100644 --- a/mise.toml +++ b/mise.toml @@ -83,3 +83,24 @@ run = [ "hugo --gc --minify --baseURL ${BASE_URL}/", "echo 'ls ./public/api' && ls ./public/api" ] + +[tasks."benchmark:quick"] +description = "Run benchmarks with reduced iterations (quick smoke test, ~10 min)" +run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 1 -wi 1 -i 3'" + +[tasks."benchmark:ci"] +description = "Run benchmarks with CI configuration (3 forks, 3 warmup, 5 measurement iterations (~60 min total)" +run = "python3 ./.mise/tasks/update_benchmarks.py --jmh-args '-f 3 -wi 3 -i 5'" + +[tasks."benchmark:ci-json"] +description = "Run benchmarks with CI configuration and JSON output (for workflow/testing)" +run = """ +./mvnw -pl benchmarks -am -DskipTests clean package +JMH_ARGS="${JMH_ARGS:--f 3 -wi 3 -i 5}" +echo "Running benchmarks with args: $JMH_ARGS" +java -jar ./benchmarks/target/benchmarks.jar -rf json -rff benchmark-results.json $JMH_ARGS +""" + +[tasks."benchmark:generate-summary"] +description = "Generate summary from existing benchmark-results.json" +run = "python3 ./.mise/tasks/generate_benchmark_summary.py"