{
  "generated_at": "2026-06-02T19:25:07.364Z",
  "benchmarks": [
    {
      "benchmark": "ARC AI2",
      "slug": "arc-ai2",
      "release_date": "2018-03-14",
      "n_models": 49,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.9373,
      "sota_model": "Llama 3.1-405B",
      "sota_date": "2024-07-23",
      "ceiling": 1,
      "headroom": 0.0627,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "BBH",
      "slug": "bbh",
      "release_date": "2022-10-17",
      "n_models": 37,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.856,
      "sota_model": "Gemini 1.5 Pro (May 2024)",
      "sota_date": "2024-05-14",
      "ceiling": 1,
      "headroom": 0.144,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "GSM8K",
      "slug": "gsm8k",
      "release_date": "2021-10-27",
      "n_models": 46,
      "first_seen": "2023-02-24",
      "last_seen": "2024-11-29",
      "sota": 0.92,
      "sota_model": "GPT-4 (Mar 2023)",
      "sota_date": "2023-03-15",
      "ceiling": 1,
      "headroom": 0.08,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 1,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "HellaSwag",
      "slug": "hellaswag",
      "release_date": "2019-05-19",
      "n_models": 38,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.9373,
      "sota_model": "GPT-4 (Mar 2023)",
      "sota_date": "2023-03-15",
      "ceiling": 1,
      "headroom": 0.0627,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "LAMBADA",
      "slug": "lambada",
      "release_date": "2016-06-20",
      "n_models": 16,
      "first_seen": "2023-02-24",
      "last_seen": "2023-09-28",
      "sota": 0.798,
      "sota_model": "Falcon-180B",
      "sota_date": "2023-09-06",
      "ceiling": 1,
      "headroom": 0.202,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "MMLU",
      "slug": "mmlu",
      "release_date": "2020-09-07",
      "n_models": 88,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.8413,
      "sota_model": "GPT-4o (Nov 2024)",
      "sota_date": "2024-05-13",
      "ceiling": 1,
      "headroom": 0.1587,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "GPQA diamond",
      "slug": "gpqa-diamond",
      "release_date": "2023-11-20",
      "n_models": 114,
      "first_seen": "2023-03-15",
      "last_seen": "2026-05-19",
      "sota": 0.928,
      "sota_model": "GPT-5.4 Pro",
      "sota_date": "2026-03-05",
      "ceiling": 1,
      "headroom": 0.072,
      "velocity_12mo": 0.0127,
      "velocity_3mo": 0.0145,
      "eta_months": 1.74,
      "solve_eta": "2026-07-25",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "MATH level 5",
      "slug": "math-level-5",
      "release_date": "2021-03-05",
      "n_models": 77,
      "first_seen": "2023-03-15",
      "last_seen": "2025-10-15",
      "sota": 0.9813,
      "sota_model": "GPT-5",
      "sota_date": "2025-08-07",
      "ceiling": 1,
      "headroom": 0.0187,
      "velocity_12mo": 0.0008,
      "velocity_3mo": null,
      "eta_months": 0,
      "solve_eta": "2026-06-02",
      "status": "SATURATED",
      "is_math": 1,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "OTIS Mock AIME 2024-2025",
      "slug": "otis-mock-aime-2024-2025",
      "release_date": "2024-12-19",
      "n_models": 103,
      "first_seen": "2023-03-15",
      "last_seen": "2026-05-19",
      "sota": 1,
      "sota_model": "GPT-5.5",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0,
      "velocity_12mo": 0.01,
      "velocity_3mo": 0.0089,
      "eta_months": 0,
      "solve_eta": "2026-06-02",
      "status": "SATURATED",
      "is_math": 1,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "WeirdML",
      "slug": "weirdml",
      "release_date": "2025-01-16",
      "n_models": 83,
      "first_seen": "2023-03-15",
      "last_seen": "2026-05-19",
      "sota": 0.8491,
      "sota_model": "GPT-5.5",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0.1509,
      "velocity_12mo": 0.0265,
      "velocity_3mo": 0.0222,
      "eta_months": 3.81,
      "solve_eta": "2026-09-26",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Winogrande",
      "slug": "winogrande",
      "release_date": "2019-07-24",
      "n_models": 50,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.784,
      "sota_model": "Llama 3.1-405B",
      "sota_date": "2024-07-23",
      "ceiling": 1,
      "headroom": 0.216,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "TriviaQA",
      "slug": "triviaqa",
      "release_date": "2017-05-09",
      "n_models": 29,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.876,
      "sota_model": "Llama 2-70B",
      "sota_date": "2023-07-18",
      "ceiling": 1,
      "headroom": 0.124,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "PIQA",
      "slug": "piqa",
      "release_date": "2019-11-26",
      "n_models": 35,
      "first_seen": "2023-02-24",
      "last_seen": "2024-12-24",
      "sota": 0.774,
      "sota_model": "GPT-4o mini",
      "sota_date": "2024-07-18",
      "ceiling": 1,
      "headroom": 0.226,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Aider polyglot",
      "slug": "aider-polyglot",
      "release_date": "2024-12-21",
      "n_models": 44,
      "first_seen": "2024-05-13",
      "last_seen": "2025-12-01",
      "sota": 0.88,
      "sota_model": "GPT-5",
      "sota_date": "2025-08-07",
      "ceiling": 1,
      "headroom": 0.12,
      "velocity_12mo": 0.0089,
      "velocity_3mo": null,
      "eta_months": 7.89,
      "solve_eta": "2027-01-28",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "GeoBench",
      "slug": "geobench",
      "release_date": "2025-03-01",
      "n_models": 26,
      "first_seen": "2024-05-10",
      "last_seen": "2025-12-17",
      "sota": 0.88,
      "sota_model": "Gemini 3 Flash",
      "sota_date": "2025-12-17",
      "ceiling": 1,
      "headroom": 0.12,
      "velocity_12mo": 0.0027,
      "velocity_3mo": null,
      "eta_months": 25.87,
      "solve_eta": "2028-07-28",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Lech Mazur Writing",
      "slug": "lech-mazur-writing",
      "release_date": "2025-01-31",
      "n_models": 38,
      "first_seen": "2024-05-13",
      "last_seen": "2025-09-19",
      "sota": 0.8729,
      "sota_model": "Kimi K2 (Sep 2025)",
      "sota_date": "2025-07-11",
      "ceiling": 1,
      "headroom": 0.1271,
      "velocity_12mo": 0.0051,
      "velocity_3mo": null,
      "eta_months": 15.17,
      "solve_eta": "2027-09-07",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Fiction.LiveBench",
      "slug": "fiction-livebench",
      "release_date": "2025-02-21",
      "n_models": 45,
      "first_seen": "2024-12-05",
      "last_seen": "2026-02-02",
      "sota": 0.972,
      "sota_model": "o3-pro",
      "sota_date": "2025-06-10",
      "ceiling": 1,
      "headroom": 0.028,
      "velocity_12mo": 0.0147,
      "velocity_3mo": null,
      "eta_months": 0,
      "solve_eta": "2026-06-02",
      "status": "SATURATED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "ANLI",
      "slug": "anli",
      "release_date": "2019-10-31",
      "n_models": 9,
      "first_seen": "2023-06-13",
      "last_seen": "2024-04-23",
      "sota": 0.3715,
      "sota_model": "GPT-3.5 Turbo (Nov 2023)",
      "sota_date": "2023-06-13",
      "ceiling": 1,
      "headroom": 0.6285,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "OpenBookQA",
      "slug": "openbookqa",
      "release_date": "2018-09-08",
      "n_models": 26,
      "first_seen": "2023-02-24",
      "last_seen": "2024-04-23",
      "sota": 0.84,
      "sota_model": "phi-3-mini 3.8B",
      "sota_date": "2024-04-23",
      "ceiling": 1,
      "headroom": 0.16,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "SimpleBench",
      "slug": "simplebench",
      "release_date": "2024-10-31",
      "n_models": 57,
      "first_seen": "2024-03-04",
      "last_seen": "2026-04-23",
      "sota": 0.7552,
      "sota_model": "Gemini 3.1 Pro",
      "sota_date": "2026-02-19",
      "ceiling": 1,
      "headroom": 0.2448,
      "velocity_12mo": 0.0278,
      "velocity_3mo": null,
      "eta_months": 7,
      "solve_eta": "2027-01-01",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "FrontierMath-2025-02-28-Private",
      "slug": "frontiermath-2025-02-28-private",
      "release_date": "2025-02-28",
      "n_models": 67,
      "first_seen": "2024-05-10",
      "last_seen": "2026-05-19",
      "sota": 0.524,
      "sota_model": "GPT-5.5 Pro",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0.476,
      "velocity_12mo": 0.0226,
      "velocity_3mo": 0.0268,
      "eta_months": 18.88,
      "solve_eta": "2027-12-29",
      "status": "ACTIVE",
      "is_math": 1,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Balrog",
      "slug": "balrog",
      "release_date": "2024-11-20",
      "n_models": 24,
      "first_seen": "2024-05-10",
      "last_seen": "2026-02-19",
      "sota": 0.581,
      "sota_model": "Gemini 3 Pro",
      "sota_date": "2025-11-18",
      "ceiling": 1,
      "headroom": 0.419,
      "velocity_12mo": 0.0189,
      "velocity_3mo": null,
      "eta_months": 19.49,
      "solve_eta": "2028-01-16",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "FrontierMath-Tier-4-2025-07-01-Private",
      "slug": "frontiermath-tier-4-2025-07-01-private",
      "release_date": null,
      "n_models": 51,
      "first_seen": "2024-06-20",
      "last_seen": "2026-05-19",
      "sota": 0.396,
      "sota_model": "GPT-5.5 Pro",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0.604,
      "velocity_12mo": 0.0273,
      "velocity_3mo": 0.019,
      "eta_months": 20.3,
      "solve_eta": "2028-02-10",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "ARC-AGI",
      "slug": "arc-agi",
      "release_date": "2019-11-05",
      "n_models": 57,
      "first_seen": "2024-05-13",
      "last_seen": "2026-05-19",
      "sota": 0.98,
      "sota_model": "Gemini 3.1 Pro",
      "sota_date": "2026-02-19",
      "ceiling": 1,
      "headroom": 0.02,
      "velocity_12mo": 0.0266,
      "velocity_3mo": null,
      "eta_months": 0,
      "solve_eta": "2026-06-02",
      "status": "SATURATED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "ARC-AGI-2",
      "slug": "arc-agi-2",
      "release_date": null,
      "n_models": 56,
      "first_seen": "2024-05-13",
      "last_seen": "2026-05-19",
      "sota": 0.85,
      "sota_model": "GPT-5.5",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0.15,
      "velocity_12mo": 0.0692,
      "velocity_3mo": 0.0382,
      "eta_months": 1.44,
      "solve_eta": "2026-07-16",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "APEX-Agents",
      "slug": "apex-agents",
      "release_date": null,
      "n_models": 32,
      "first_seen": "2024-12-05",
      "last_seen": "2026-05-19",
      "sota": 0.496,
      "sota_model": "Gemini 3.5 Flash",
      "sota_date": "2026-05-19",
      "ceiling": 1,
      "headroom": 0.504,
      "velocity_12mo": 0.0192,
      "velocity_3mo": 0.0293,
      "eta_months": 23.71,
      "solve_eta": "2028-05-24",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Cybench",
      "slug": "cybench",
      "release_date": "2024-08-15",
      "n_models": 20,
      "first_seen": "2024-02-15",
      "last_seen": "2026-02-05",
      "sota": 0.93,
      "sota_model": "Claude Opus 4.6",
      "sota_date": "2026-02-05",
      "ceiling": 1,
      "headroom": 0.07,
      "velocity_12mo": 0.0646,
      "velocity_3mo": null,
      "eta_months": 0.31,
      "solve_eta": "2026-06-11",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "SimpleQA Verified",
      "slug": "simpleqa-verified",
      "release_date": null,
      "n_models": 44,
      "first_seen": "2024-10-22",
      "last_seen": "2026-05-19",
      "sota": 0.773,
      "sota_model": "Gemini 3.1 Pro",
      "sota_date": "2026-02-19",
      "ceiling": 1,
      "headroom": 0.227,
      "velocity_12mo": 0.0174,
      "velocity_3mo": null,
      "eta_months": 10.19,
      "solve_eta": "2027-04-08",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "DeepResearch Bench",
      "slug": "deepresearch-bench",
      "release_date": "2025-06-13",
      "n_models": 13,
      "first_seen": "2024-12-20",
      "last_seen": "2025-09-29",
      "sota": 0.5513,
      "sota_model": "GPT-5",
      "sota_date": "2025-08-07",
      "ceiling": 1,
      "headroom": 0.4487,
      "velocity_12mo": 0.0242,
      "velocity_3mo": null,
      "eta_months": 16.45,
      "solve_eta": "2027-10-16",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Terminal Bench",
      "slug": "terminal-bench",
      "release_date": "2025-05-19",
      "n_models": 32,
      "first_seen": "2025-06-05",
      "last_seen": "2026-04-23",
      "sota": 0.902,
      "sota_model": "Claude Opus 4.7",
      "sota_date": "2026-04-16",
      "ceiling": 1,
      "headroom": 0.098,
      "velocity_12mo": 0.0557,
      "velocity_3mo": 0.0544,
      "eta_months": 0.86,
      "solve_eta": "2026-06-28",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "Chess Puzzles",
      "slug": "chess-puzzles",
      "release_date": null,
      "n_models": 37,
      "first_seen": "2025-01-31",
      "last_seen": "2026-05-19",
      "sota": 0.64,
      "sota_model": "GPT-5.5 Pro",
      "sota_date": "2026-04-23",
      "ceiling": 1,
      "headroom": 0.36,
      "velocity_12mo": 0.0311,
      "velocity_3mo": 0.0435,
      "eta_months": 9.97,
      "solve_eta": "2027-04-01",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "CadEval",
      "slug": "cadeval",
      "release_date": "2025-04-22",
      "n_models": 14,
      "first_seen": "2024-03-04",
      "last_seen": "2025-04-16",
      "sota": 0.74,
      "sota_model": "o3",
      "sota_date": "2024-12-20",
      "ceiling": 1,
      "headroom": 0.26,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "GSO-Bench",
      "slug": "gso-bench",
      "release_date": "2025-05-29",
      "n_models": 22,
      "first_seen": "2024-05-13",
      "last_seen": "2026-04-23",
      "sota": 0.441,
      "sota_model": "Claude Opus 4.7",
      "sota_date": "2026-04-16",
      "ceiling": 1,
      "headroom": 0.559,
      "velocity_12mo": 0.0223,
      "velocity_3mo": 0.0126,
      "eta_months": 22.83,
      "solve_eta": "2028-04-27",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "PostTrainBench",
      "slug": "posttrainbench",
      "release_date": null,
      "n_models": 16,
      "first_seen": "2025-09-05",
      "last_seen": "2026-04-16",
      "sota": 0.2856,
      "sota_model": "Claude Opus 4.7",
      "sota_date": "2026-04-16",
      "ceiling": 1,
      "headroom": 0.7144,
      "velocity_12mo": 0.0289,
      "velocity_3mo": 0.0235,
      "eta_months": 23.02,
      "solve_eta": "2028-05-03",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "SWE-Bench verified",
      "slug": "swe-bench-verified",
      "release_date": "2024-08-13",
      "n_models": 27,
      "first_seen": "2024-05-13",
      "last_seen": "2026-04-23",
      "sota": 0.8347,
      "sota_model": "Claude Opus 4.7",
      "sota_date": "2026-04-16",
      "ceiling": 1,
      "headroom": 0.1653,
      "velocity_12mo": 0.0119,
      "velocity_3mo": 0.0207,
      "eta_months": 9.73,
      "solve_eta": "2027-03-25",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 1,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "OSWorld",
      "slug": "osworld",
      "release_date": "2024-04-11",
      "n_models": 8,
      "first_seen": "2024-09-19",
      "last_seen": "2026-02-17",
      "sota": 0.721,
      "sota_model": "Claude Sonnet 4.6",
      "sota_date": "2026-02-17",
      "ceiling": 1,
      "headroom": 0.279,
      "velocity_12mo": 0.0317,
      "velocity_3mo": null,
      "eta_months": 7.23,
      "solve_eta": "2027-01-08",
      "status": "ACTIVE",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "HLE",
      "slug": "hle",
      "release_date": null,
      "n_models": 31,
      "first_seen": "2024-12-05",
      "last_seen": "2026-04-16",
      "sota": 0.4374,
      "sota_model": "Gemini 3.1 Pro",
      "sota_date": "2026-02-19",
      "ceiling": 1,
      "headroom": 0.5626,
      "velocity_12mo": 0.0196,
      "velocity_3mo": null,
      "eta_months": 26.15,
      "solve_eta": "2028-08-06",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "ScienceQA",
      "slug": "scienceqa",
      "release_date": "2022-09-20",
      "n_models": 6,
      "first_seen": "2023-02-24",
      "last_seen": "2024-05-13",
      "sota": 0.8467,
      "sota_model": "GPT-4o (May 2024)",
      "sota_date": "2024-05-13",
      "ceiling": 1,
      "headroom": 0.1533,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "VideoMME",
      "slug": "videomme",
      "release_date": "2024-05-31",
      "n_models": 9,
      "first_seen": "2024-02-15",
      "last_seen": "2024-10-22",
      "sota": 0.6667,
      "sota_model": "Gemini 1.5 Pro (Feb 2024)",
      "sota_date": "2024-02-15",
      "ceiling": 1,
      "headroom": 0.3333,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "DEAD",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "CSQA2",
      "slug": "csqa2",
      "release_date": "2022-01-14",
      "n_models": 2,
      "first_seen": "2023-06-13",
      "last_seen": "2023-07-18",
      "sota": 0.14,
      "sota_model": "GPT-3.5 Turbo (Jun 2023)",
      "sota_date": "2023-06-13",
      "ceiling": 1,
      "headroom": 0.86,
      "velocity_12mo": null,
      "velocity_3mo": null,
      "eta_months": null,
      "solve_eta": null,
      "status": "INSUFFICIENT_DATA",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "The Agent Company",
      "slug": "the-agent-company",
      "release_date": "2024-12-18",
      "n_models": 13,
      "first_seen": "2024-05-13",
      "last_seen": "2025-09-29",
      "sota": 0.429,
      "sota_model": "DeepSeek-V3.2-Exp",
      "sota_date": "2025-09-29",
      "ceiling": 1,
      "headroom": 0.571,
      "velocity_12mo": 0.0229,
      "velocity_3mo": null,
      "eta_months": 22.7,
      "solve_eta": "2028-04-23",
      "status": "FRONTIER",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    },
    {
      "benchmark": "VPCT",
      "slug": "vpct",
      "release_date": "2025-01-30",
      "n_models": 24,
      "first_seen": "2024-05-13",
      "last_seen": "2025-12-17",
      "sota": 0.865,
      "sota_model": "Gemini 3 Pro",
      "sota_date": "2025-11-18",
      "ceiling": 1,
      "headroom": 0.135,
      "velocity_12mo": 0.0708,
      "velocity_3mo": null,
      "eta_months": 1.2,
      "solve_eta": "2026-07-09",
      "status": "NEAR_SOLVED",
      "is_math": 0,
      "is_coding": 0,
      "updated_at": "2026-06-02T18:07:01.304Z"
    }
  ]
}