{
  "ok": true,
  "packet": {
    "packet_id": "catalyst-q-benchmark-evidence-packet-v1",
    "generated_at": "2026-05-27T00:00:00.000Z",
    "positioning": "Verification layer for high-stakes scientific and operational AI decisions.",
    "claim_boundary": "Benchmark-readiness and signed evidence, not solver superiority, unrestricted 10k-qubit FCI, ACOPF superiority, production safety certification, or a replacement for customer scientific/operational review.",
    "evidence_readiness": {
      "status": "complete",
      "score_pct": 100,
      "checks_passed": 5,
      "checks_total": 5,
      "summary": "Aggregate benchmark evidence packet is 100% complete for the declared evidence-pack scope.",
      "checks": [
        {
          "id": "exact_chemistry_ready",
          "label": "Exact chemistry packet ready",
          "pass": true,
          "evidence": "Exact chemistry verification packet is 100% complete for the declared evidence-pack scope."
        },
        {
          "id": "freight_ready",
          "label": "Freight proof packet ready",
          "pass": true,
          "evidence": "Freight RouteOps proof packet is 100% complete for the declared evidence-pack scope."
        },
        {
          "id": "grid_ready",
          "label": "Grid evidence section ready",
          "pass": true,
          "evidence": "Benchmark grid section is 100% complete for the declared evidence-pack scope."
        },
        {
          "id": "verification_ready",
          "label": "Verification evidence ready",
          "pass": true,
          "evidence": "Benchmark verification section is 100% complete for the declared evidence-pack scope."
        },
        {
          "id": "external_promotion_gates_separated",
          "label": "External promotion gates separated",
          "pass": true,
          "evidence": "Freight solver superiority, chemistry market-claim expansion, and grid ACOPF claims remain outside readiness scoring."
        }
      ]
    },
    "exact_chemistry": {
      "evidence_readiness": {
        "status": "complete",
        "score_pct": 100,
        "checks_passed": 4,
        "checks_total": 4,
        "summary": "Benchmark exact chemistry section is 100% complete for the declared evidence-pack scope.",
        "checks": [
          {
            "id": "baseline_tools_declared",
            "label": "Reference chemistry stacks declared",
            "pass": true,
            "evidence": "PySCF, Psi4, OpenFermion, and Qiskit Nature are listed as reference lanes."
          },
          {
            "id": "scoped_active_space_cases_declared",
            "label": "Scoped active-space cases declared",
            "pass": true,
            "evidence": "LiH, hydrogen-chain, and FeMo fragment benchmark cases are included."
          },
          {
            "id": "benchmark_matrix_declared",
            "label": "Benchmark matrix declared",
            "pass": true,
            "evidence": "Hamiltonian, eval:chemistry:references, Catalyst-Q, and promotion steps are listed."
          },
          {
            "id": "external_reference_gate_tracked",
            "label": "External reference gate tracked",
            "pass": true,
            "evidence": "PySCF/Psi4/OpenFermion/Qiskit Nature promotion gate is tracked separately from packet readiness."
          }
        ]
      },
      "baseline_tools": [
        {
          "name": "PySCF",
          "role": "Reference molecular integrals, active-space setup, and FCI/CASCI comparison on small scoped systems; wired through npm run eval:chemistry:references.",
          "adapter_status": "reference_required",
          "evidence_target": "H2, LiH/STO-3G, H6, H8, and 24/40/50/64/96/128/256/512/1024-qubit decomposed active-space reference rows now recorded; transition-metal fragment packets remain next."
        },
        {
          "name": "Psi4",
          "role": "Independent quantum-chemistry reference for DFT/HF/MP2 energies and basis metadata.",
          "adapter_status": "planned",
          "evidence_target": "Approximate-method disagreement rows and reproducible geometry/basis provenance."
        },
        {
          "name": "OpenFermion",
          "role": "Hamiltonian construction and sparse exact-diagonalization cross-checks for scoped active spaces; wired through npm run eval:chemistry:references.",
          "adapter_status": "reference_required",
          "evidence_target": "Sparse Hamiltonian diagonalization rows now recorded against PySCF FCI for H2, LiH, H6, H8, and decomposed 24/40/50/64/96/128/256/512/1024-qubit additivity checks."
        },
        {
          "name": "Qiskit Nature",
          "role": "Independent electronic-structure workflow and operator conversion comparison.",
          "adapter_status": "planned",
          "evidence_target": "Active-space operator agreement, symmetry labels, and replayable reference notebooks."
        }
      ],
      "scoped_active_space_cases": [
        {
          "case_id": "lih_sto3g_active_space",
          "molecule_family": "small-molecule sanity check",
          "customer_value": "Fast reference case before any paid verification packet.",
          "active_space": {
            "electrons": 4,
            "orbitals": 4,
            "basis": "STO-3G"
          },
          "acceptance": {
            "energy_delta_threshold": "<= 1 mHa against PySCF/OpenFermion exact reference",
            "required_checks": [
              "schema_integrity",
              "active_space_declared",
              "symmetry_cross_check",
              "small_subsystem_cross_check"
            ]
          },
          "benchmark_tools": [
            "PySCF",
            "OpenFermion",
            "Qiskit Nature"
          ],
          "claim_boundary": "Active-space exactness only; do not generalize to unrestricted molecules."
        },
        {
          "case_id": "h_chain_multireference_probe",
          "molecule_family": "strong-correlation hydrogen chain",
          "customer_value": "Shows whether prompt/tooling catches static-correlation cases where single-reference methods drift.",
          "active_space": {
            "electrons": 10,
            "orbitals": 10,
            "basis": "STO-6G screening basis"
          },
          "acceptance": {
            "energy_delta_threshold": "<= 1 kcal/mol inside the declared active-space model",
            "required_checks": [
              "electron_number",
              "spin_multiplicity",
              "deterministic_replay"
            ]
          },
          "benchmark_tools": [
            "PySCF",
            "OpenFermion"
          ],
          "claim_boundary": "Active-space benchmark case, not material-scale FCI evidence."
        },
        {
          "case_id": "femo_fragment_screen",
          "molecule_family": "transition-metal catalyst fragment",
          "customer_value": "Premium paid-pilot case for disputed DFT spin ordering in green-ammonia catalyst screens.",
          "active_space": {
            "electrons": 18,
            "orbitals": 18,
            "basis": "def2-SVP active-space screening basis"
          },
          "acceptance": {
            "energy_delta_threshold": "<= 1 kcal/mol on smaller replayable subsystems; larger packet must publish subsystem deltas",
            "required_checks": [
              "small_subsystem_cross_check",
              "symmetry_cross_check",
              "approximate_method_disagreement"
            ]
          },
          "benchmark_tools": [
            "PySCF",
            "Psi4",
            "OpenFermion",
            "Qiskit Nature"
          ],
          "claim_boundary": "Active-space verification packet for a declared fragment; do not claim unrestricted 10k-qubit FCI."
        }
      ],
      "benchmark_matrix": [
        "Generate molecular Hamiltonian and active-space metadata from the same geometry, charge, spin, basis, and frozen-core choices.",
        "Run npm run eval:chemistry:references to record PySCF FCI and OpenFermion sparse-diagonalization rows for H2, LiH, H6, and H8 plus decomposed 24-, 40-, 50-, 64-, 96-, 128-, 256-, 512-, and 1024-qubit active-space exact validations.",
        "Record PySCF/Psi4/OpenFermion/Qiskit Nature reference rows where each tool is appropriate and reproducible.",
        "Run Catalyst-Q verification and compare energy deltas, operator invariants, symmetry labels, subsystem checks, and replay digests.",
        "Promote only when every case has deterministic replay ids, signed reports, and published claim boundaries."
      ],
      "claim_boundary": "Do not claim unrestricted 10k-qubit FCI, arbitrary dense-state materialization, physical quantum speedup, or customer scientific truth outside the declared active-space benchmark.",
      "promotion_gate": {
        "passed": false,
        "reason": "Exact chemistry now records PySCF/OpenFermion scoped reference rows; Psi4, Qiskit Nature, and larger scientist-reviewed transition-metal rows remain required before broad market claims."
      }
    },
    "freight": {
      "evidence_readiness": {
        "status": "complete",
        "score_pct": 100,
        "checks_passed": 4,
        "checks_total": 4,
        "summary": "Benchmark freight section is 100% complete for the declared evidence-pack scope.",
        "checks": [
          {
            "id": "proof_packet_linked",
            "label": "Freight proof packet linked",
            "pass": true,
            "evidence": "Freight packet freight-routeops-proof-packet-v1 is attached to the benchmark evidence packet."
          },
          {
            "id": "required_open_solvers_declared",
            "label": "Required open solvers declared",
            "pass": true,
            "evidence": "OR-Tools, PyVRP, and VROOM are all tracked."
          },
          {
            "id": "benchmark_families_declared",
            "label": "Benchmark families declared",
            "pass": true,
            "evidence": "CVRPLIB, Solomon/Homberger, and customer shadow-mode CSV families are included."
          },
          {
            "id": "external_baseline_gate_tracked",
            "label": "External baseline gate tracked",
            "pass": true,
            "evidence": "Solver-superiority promotion is kept as a separate OR-Tools/PyVRP/VROOM gate."
          }
        ]
      },
      "proof_packet_id": "freight-routeops-proof-packet-v1",
      "required_solvers": [
        {
          "id": "ortools",
          "name": "Google OR-Tools",
          "status": "remote_available",
          "evidence": "Live in the Cloudflare benchmark container and returning feasible RouteOps comparison rows."
        },
        {
          "id": "pyvrp",
          "name": "PyVRP",
          "status": "remote_available",
          "evidence": "Live in the Cloudflare benchmark container and returning feasible RouteOps comparison rows."
        },
        {
          "id": "vroom",
          "name": "VROOM",
          "status": "remote_available",
          "evidence": "Pinned VROOM CLI is live in the Cloudflare benchmark container and returning feasible RouteOps comparison rows."
        }
      ],
      "benchmark_families": [
        {
          "name": "CVRPLIB CVRP",
          "role": "Capacity-constrained public routing instances with known best/optimal values.",
          "acceptance": "Distance/objective gap, capacity feasibility, vehicle count, and replayable seed."
        },
        {
          "name": "Solomon/Homberger VRPTW",
          "role": "Time-window, service-time, and larger-scale route-planning benchmarks.",
          "acceptance": "Zero hard violations, lateness objective, route count, distance, and route stability."
        },
        {
          "name": "Customer shadow-mode CSV",
          "role": "Orders, vehicles, time windows, driver hours, disruptions, and dispatcher approvals.",
          "acceptance": "Signed baseline, holdout comparison, fuel/emissions proxy, accepted recommendation rate, and savings attribution."
        }
      ],
      "promotion_gate": {
        "id": "freight-open-solver-superiority",
        "passed": false,
        "reason": "OR-Tools, PyVRP, and VROOM comparison rows are now live; use the best feasible open-solver route as the ensemble seed and keep solver-superiority language gated until Catalyst-Q beats, matches, or explains gaps across pinned public benchmark suites."
      }
    },
    "grid": {
      "evidence_readiness": {
        "status": "complete",
        "score_pct": 100,
        "checks_passed": 4,
        "checks_total": 4,
        "summary": "Benchmark grid section is 100% complete for the declared evidence-pack scope.",
        "checks": [
          {
            "id": "pglib_matpower_cases_declared",
            "label": "PGLib/MATPOWER cases declared",
            "pass": true,
            "evidence": "PGLib case14, case118, and case1354 PEGASE are included as benchmark cases."
          },
          {
            "id": "case_families_declared",
            "label": "Case families declared",
            "pass": true,
            "evidence": "PGLib-OPF and MATPOWER roles are included."
          },
          {
            "id": "validators_declared",
            "label": "Validators declared",
            "pass": true,
            "evidence": "DC screening, line-constrained redispatch, and AC feasibility validators are listed."
          },
          {
            "id": "external_acopf_gate_tracked",
            "label": "External ACOPF gate tracked",
            "pass": true,
            "evidence": "ACOPF/utility-grade claims stay behind a separate promotion gate."
          }
        ]
      },
      "case_families": [
        {
          "name": "PGLib-OPF",
          "role": "Public AC optimal power-flow benchmark cases in MATPOWER format.",
          "acceptance": "Parsed network, DC screening objective, line/voltage violation report, and AC validation status."
        },
        {
          "name": "MATPOWER",
          "role": "Canonical parser and case-format contract for bus/gen/branch/gencost records.",
          "acceptance": "Round-trip parser integrity and external ACOPF/DCOPF comparison before utility-grade claims."
        }
      ],
      "pglib_cases": [
        {
          "case_id": "pglib_opf_case14_ieee",
          "source": "PGLib-OPF",
          "scale": "small IEEE proof fixture",
          "acceptance": [
            "parse MATPOWER",
            "run catalyst-dcopf-cg-v1",
            "run catalyst-dcopf-cut-v1",
            "run catalyst-acpf-nr-v1"
          ]
        },
        {
          "case_id": "pglib_opf_case118_ieee",
          "source": "PGLib-OPF",
          "scale": "medium IEEE proof fixture",
          "acceptance": [
            "parse MATPOWER",
            "record external reference objective",
            "publish constraint/violation report"
          ]
        },
        {
          "case_id": "pglib_opf_case1354_pegase",
          "source": "PGLib-OPF",
          "scale": "large PEGASE scale gate",
          "acceptance": [
            "parse MATPOWER",
            "run bounded DC redispatch where feasible",
            "avoid ACOPF superiority claims"
          ]
        }
      ],
      "validators": [
        {
          "id": "catalyst-dcopf-cg-v1",
          "role": "Pinned DC power-flow screening runner for parsed PGLib/MATPOWER cases.",
          "claim_boundary": "DC screening evidence only."
        },
        {
          "id": "catalyst-dcopf-cut-v1",
          "role": "Bounded line-constrained DC redispatch gate for overload removal where feasible.",
          "claim_boundary": "Bounded redispatch, not a full ADMS/DERMS replacement."
        },
        {
          "id": "catalyst-acpf-nr-v1",
          "role": "Newton-Raphson AC feasibility validation attempt and violation surfacing.",
          "claim_boundary": "Validation evidence, not ACOPF superiority."
        }
      ],
      "claim_boundary": "PGLib/MATPOWER scale evidence is grid-screening and validation evidence, not ACOPF superiority, not utility certification, and not a SCADA/ADMS/DERMS replacement.",
      "promotion_gate": {
        "passed": false,
        "reason": "PGLib/MATPOWER parsing and DC screening are wired; external ACOPF feasibility validation and larger-case objective comparisons remain required."
      }
    },
    "verification": {
      "evidence_readiness": {
        "status": "complete",
        "score_pct": 100,
        "checks_passed": 4,
        "checks_total": 4,
        "summary": "Benchmark verification section is 100% complete for the declared evidence-pack scope.",
        "checks": [
          {
            "id": "deterministic_replay_present",
            "label": "Deterministic replay present",
            "pass": true,
            "evidence": "Replay id cqv_c667b018f438662a is attached."
          },
          {
            "id": "canonical_digest_present",
            "label": "Canonical digest present",
            "pass": true,
            "evidence": "Canonical digest c667b018f438... is attached."
          },
          {
            "id": "tamper_evidence_passes",
            "label": "Tamper evidence passes",
            "pass": true,
            "evidence": "Original packet verifies and modified active-space metadata fails verification."
          },
          {
            "id": "signed_report_requirements_declared",
            "label": "Signed report requirements declared",
            "pass": true,
            "evidence": "Replay id, canonical SHA-256, HMAC signature, subsystem checks, and promotion gates are required."
          }
        ]
      },
      "deterministic_replay": {
        "packet_id": "exact-chemistry-verification-packet-v1",
        "replay_id": "cqv_c667b018f438662a",
        "canonical_sha256": "c667b018f438662accdc5a34b0b4bfdd05b6e0c0b5ae97b2b3b075bdd17ca7d3",
        "digest_algorithm": "SHA-256"
      },
      "checks": [
        {
          "id": "schema_integrity",
          "label": "Packet schema integrity",
          "pass": true,
          "evidence": "Required system, geometry hash hint, and basis metadata are present."
        },
        {
          "id": "active_space_declared",
          "label": "Active space declared",
          "pass": true,
          "evidence": "18 electrons in 18 orbitals: Transition-metal d orbitals, bridging sulfurs, and substrate frontier orbitals selected for static-correlation screening."
        },
        {
          "id": "charge_spin_declared",
          "label": "Charge and spin declared",
          "pass": true,
          "evidence": "charge=-1; spin_multiplicity=4"
        },
        {
          "id": "symmetry_cross_check",
          "label": "Symmetry and conservation checks",
          "pass": true,
          "evidence": "labels=electron_number, spin_multiplicity, point_group_fragment_Cs, charge_conservation"
        },
        {
          "id": "small_subsystem_cross_check",
          "label": "Small exact-subsystem replay",
          "pass": true,
          "evidence": "fes2_fragment delta 0.097 mHa <= 1 mHa; mo_s2_fragment delta 0.12 mHa <= 1 mHa"
        },
        {
          "id": "variational_bound_check",
          "label": "Approximate-method disagreement surfaced",
          "pass": true,
          "evidence": "closest approximate method DLPNO-CCSD(T) small model differs by 2.718 kcal/mol; packet should be sold as verification, not blind generation."
        }
      ],
      "tamper_evidence": {
        "original_verifies": true,
        "modified_payload_fails": true,
        "modified_field": "canonical_payload.active_space.orbitals"
      },
      "signed_report_requirements": [
        "Every benchmark evidence packet includes a deterministic replay id.",
        "Every signed report includes canonical payload SHA-256 and HMAC-SHA-256 signature.",
        "Smaller-subsystem checks and symmetry checks are mandatory for exact chemistry packets.",
        "Freight/grid promotion gates remain closed until required external baselines are recorded."
      ]
    },
    "signed_report": {
      "signature_algorithm": "HMAC-SHA-256",
      "signature_key_id": "catalyst-q-worker-secret-or-dev-key-v1",
      "signature": "5c7795a267c19d0bbe671cd7c463a4b99a37b6a187659499df0cdc137ed982d8",
      "canonical_payload_sha256": "2cba74fe0c76e1648fa69182395a6029123f6bce659ff473ed4d6763eabb8f46",
      "report_url_path": "/v1/benchmark-evidence",
      "verification_method": "verifyBenchmarkEvidencePacket(packet)"
    }
  },
  "links": {
    "page": "https://catalyst-q.strategic-innovations.ai/benchmark-evidence",
    "evals": "https://catalyst-q.strategic-innovations.ai/evals",
    "competition": "https://catalyst-q.strategic-innovations.ai/competition"
  }
}