test: add summary-cache pytest-benchmark suite with CI regression gate by clean6378-max-it · Pull Request #120 · cppalliance/cppa-cursor-browser · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions .github/workflows/tests.yml
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ Thumbs.db
htmlcov/
coverage.xml
.hypothesis/
benchmark-results.json
benchmarks/_raw.json
15 changes: 15 additions & 0 deletions benchmarks/baselines.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"_note": "Gated means from ubuntu-latest CI benchmark-results.json (PR #120, run 28123677675). Refresh after intentional perf changes: download benchmark-results.json from the CI artifacts job, then `python scripts/check_benchmark_regression.py benchmark-results.json benchmarks/baselines.json` (re-seed with reduce_baselines or edit means). Local capture: `pytest tests/benchmarks/ --benchmark-only --benchmark-json=benchmark-results.json -o addopts=` on ubuntu-latest.",
"updated": "2026-06-24T19:20:27Z",
"machine": "Linux",
"groups": {
"summary-cache": {
"test_summary_cache_hit": 6.3e-05,
"test_summary_cache_miss": 6.3e-05,
"test_fingerprint_workspace_entries[10]": 0.001844,
"test_fingerprint_workspace_entries[50]": 0.007759,
"test_fingerprint_workspace_entries[200]": 0.022231,
"test_summary_cache_round_trip": 0.000351
}
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,20 @@ desktop = ["pywebview>=5.0,<6"]
# Development tooling: testing + type checking.
dev = [
"pytest>=8,<9",
"pytest-benchmark>=4,<5",
"mypy>=1.10,<2",
"hypothesis>=6.100,<7",
]

[tool.pytest.ini_options]
# Requires pytest-benchmark (pip install -e ".[dev]") for default `pytest` invocations.
pythonpath = ["."]
addopts = "--benchmark-skip"
testpaths = ["tests"]
Comment thread
clean6378-max-it marked this conversation as resolved.
markers = [
"benchmark: performance benchmarks (pytest-benchmark)",
]
Comment thread
coderabbitai[bot] marked this conversation as resolved.

[project.scripts]
# Primary CLI: export Cursor chat histories to Markdown / zip.
# Usage: cursor-chat-export [--since all|last] [--out DIR] [--no-zip] [--help]
Expand Down
2 changes: 1 addition & 1 deletion requirements-lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g.
# colorama via click) are omitted — pip still installs them on Windows when needed.
blinker==1.9.0 # via flask
click==8.4.1 # via flask
click==8.4.2 # via flask
defusedxml==0.7.1 # via fpdf2
flask==3.1.3 # via -r requirements.txt
fonttools==4.63.0 # via fpdf2
Expand Down
163 changes: 163 additions & 0 deletions scripts/check_benchmark_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Compare pytest-benchmark JSON output against stored baselines."""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

THRESHOLD = 1.20


class BenchmarkDataError(ValueError):
"""Raised when benchmark JSON input is malformed or missing required fields."""


def normalize_benchmark_name(name: str) -> str:
"""Strip pytest file node prefix so baselines match short or full benchmark names."""
text = str(name)
if "::" not in text:
return text
prefix, _, suffix = text.partition("::")
# Only strip module paths (…/test_foo.py::test_name); leave "::" inside [param::value] intact.
if prefix.endswith(".py"):
return suffix
return text


def load_results(results_path: str | Path) -> dict[str, float]:
path = Path(results_path)
try:
data = json.loads(path.read_text(encoding="utf-8"))
except OSError as exc:
raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
except json.JSONDecodeError as exc:
raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
try:
benchmarks = data["benchmarks"]
except (KeyError, TypeError) as exc:
raise BenchmarkDataError(f"{path} missing top-level 'benchmarks' array") from exc
if not isinstance(benchmarks, list):
raise BenchmarkDataError(f"{path} 'benchmarks' must be an array")

results: dict[str, float] = {}
for index, entry in enumerate(benchmarks):
if not isinstance(entry, dict):
raise BenchmarkDataError(f"{path} benchmarks[{index}] must be an object")
try:
raw_name = entry["name"]
mean = float(entry["stats"]["mean"])
except (KeyError, TypeError, ValueError) as exc:
raise BenchmarkDataError(
f"{path} benchmarks[{index}] missing 'name' or 'stats.mean'"
) from exc
name = normalize_benchmark_name(str(raw_name))
if name in results:
raise BenchmarkDataError(f"{path} duplicate benchmark name {name!r}")
results[name] = mean
return results


def load_baseline_means(baselines_path: str | Path) -> dict[str, float]:
path = Path(baselines_path)
try:
data = json.loads(path.read_text(encoding="utf-8"))
except OSError as exc:
raise BenchmarkDataError(f"cannot read {path}: {exc}") from exc
except json.JSONDecodeError as exc:
raise BenchmarkDataError(f"invalid JSON in {path}: {exc}") from exc
if not isinstance(data, dict):
raise BenchmarkDataError(f"{path} root value must be an object")

if "groups" not in data:
raise BenchmarkDataError(f"{path} missing required 'groups' key")
groups = data["groups"]
if not isinstance(groups, dict):
raise BenchmarkDataError(f"{path} 'groups' must be an object")

means: dict[str, float] = {}
for group_name, value in groups.items():
if not isinstance(value, dict):
raise BenchmarkDataError(
f"{path} groups[{group_name!r}] must be an object of benchmark means"
)
for name, mean in value.items():
bench_name = normalize_benchmark_name(str(name))
if bench_name in means:
raise BenchmarkDataError(
f"{path} duplicate benchmark name {bench_name!r} across groups"
)
try:
means[bench_name] = float(mean)
except (TypeError, ValueError) as exc:
raise BenchmarkDataError(
f"{path} groups[{group_name!r}][{name!r}] is not a numeric mean"
) from exc
return means


def check_regression(
results_path: str | Path,
baselines_path: str | Path,
*,
threshold: float = THRESHOLD,
) -> int:
"""Return 0 when within threshold; 1 when any gated benchmark regresses."""
flat = load_results(results_path)
baseline_means = load_baseline_means(baselines_path)

failures: list[str] = []
missing: list[str] = []
for name, base in baseline_means.items():
cur = flat.get(name)
if cur is None:
print(f"FAIL: no current result for gated baseline {name!r}")
missing.append(name)
continue
if base == 0:
print(f"WARN: baseline for {name!r} is zero; skipping ratio check")
continue
ratio = cur / base
tag = "FAIL" if ratio > threshold else "ok"
print(f"[{tag}] {name}: {cur:.6f}s vs {base:.6f}s ({ratio:.2f}x)")
if ratio > threshold:
failures.append(name)

Comment thread
clean6378-max-it marked this conversation as resolved.
for name in flat:
if name not in baseline_means:
print(f"WARN: {name!r} has no baseline yet; not gated")

if failures:
print(f"\nREGRESSION: {len(failures)} benchmark(s) exceeded {threshold:.0%}")
if missing:
print(f"\nMISSING: {len(missing)} gated benchmark(s) absent from current results")
if failures or missing:
return 1
return 0


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("results_path", help="pytest-benchmark --benchmark-json output")
parser.add_argument("baselines_path", help="path to benchmarks/baselines.json")
parser.add_argument(
"--threshold",
type=float,
default=THRESHOLD,
help="fail when current mean exceeds baseline by more than this ratio (default: 1.20)",
)
args = parser.parse_args(argv)
try:
return check_regression(
args.results_path,
args.baselines_path,
threshold=args.threshold,
)
except BenchmarkDataError as exc:
print(f"ERROR: {exc}", file=sys.stderr)
return 2


if __name__ == "__main__":
sys.exit(main())
Empty file added tests/benchmarks/__init__.py
Empty file.
89 changes: 89 additions & 0 deletions tests/benchmarks/conftest.py
Loading
Loading