{"sim":{"judge_prompt_tokens":1500,"judgement_tokens":50,"judgements_per_run":20,"runs_per_day":100},"rows":[{"slug":"vectara-hhem","name":"Vectara HHEM","class":"hosted-judge-api","vendor":"Vectara","openrouter_id":null,"hf_id":null,"github_repo":null,"context_length":null,"cost":{"per_judgement":0.00005,"cost_per_1k":0.05,"daily_cost":0.1,"monthly_cost":3},"price_ts":"2026-05-01","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Hughes Hallucination Evaluation Model; classification-only judge.","pricing_last_verified_at":"2026-05-01"},{"slug":"gpt-5-nano","name":"GPT-5 Nano","class":"frontier-generalist","vendor":"OpenAI","openrouter_id":"openai/gpt-5-nano","hf_id":null,"github_repo":null,"context_length":400000,"cost":{"per_judgement":0.00009499999999999999,"cost_per_1k":0.09499999999999999,"daily_cost":0.18999999999999997,"monthly_cost":5.699999999999999},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Cheapest OpenAI judge; binary-grade tasks only.","pricing_last_verified_at":null},{"slug":"atla-selene-api","name":"Atla Selene (hosted API)","class":"hosted-judge-api","vendor":"Atla AI","openrouter_id":null,"hf_id":null,"github_repo":null,"context_length":null,"cost":{"per_judgement":0.00015,"cost_per_1k":0.15,"daily_cost":0.3,"monthly_cost":9},"price_ts":"2026-05-01","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Hosted Selene API; price approximated as $0.15 per 1k judgements (verify before quoting).","pricing_last_verified_at":"2026-05-01"},{"slug":"galileo-luna","name":"Galileo Luna","class":"hosted-judge-api","vendor":"Galileo","openrouter_id":null,"hf_id":null,"github_repo":null,"context_length":null,"cost":{"per_judgement":0.0002,"cost_per_1k":0.2,"daily_cost":0.4,"monthly_cost":12},"price_ts":"2026-05-01","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Galileo's small judge (Llama-3.1 8B fine-tune); hallucination + tool-use grading.","pricing_last_verified_at":"2026-05-01"},{"slug":"llama-4-maverick","name":"Llama 4 Maverick","class":"frontier-generalist","vendor":"Meta","openrouter_id":"meta-llama/llama-4-maverick","hf_id":null,"github_repo":null,"context_length":1048576,"cost":{"per_judgement":0.000255,"cost_per_1k":0.255,"daily_cost":0.51,"monthly_cost":15.3},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Open-weights frontier; reasonable judge baseline.","pricing_last_verified_at":null},{"slug":"patronus-lynx","name":"Patronus Lynx","class":"hosted-judge-api","vendor":"Patronus AI","openrouter_id":null,"hf_id":null,"github_repo":null,"context_length":null,"cost":{"per_judgement":0.0003,"cost_per_1k":0.3,"daily_cost":0.6,"monthly_cost":18},"price_ts":"2026-05-01","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Hallucination-detection judge; per-call pricing from Patronus public docs.","pricing_last_verified_at":"2026-05-01"},{"slug":"gpt-5-mini","name":"GPT-5 Mini","class":"frontier-generalist","vendor":"OpenAI","openrouter_id":"openai/gpt-5-mini","hf_id":null,"github_repo":null,"context_length":400000,"cost":{"per_judgement":0.000475,"cost_per_1k":0.475,"daily_cost":0.95,"monthly_cost":28.5},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"10× cheaper than full GPT-5; high agreement on routine grading.","pricing_last_verified_at":null},{"slug":"gemini-2-5-flash","name":"Gemini 2.5 Flash","class":"frontier-generalist","vendor":"Google","openrouter_id":"google/gemini-2.5-flash","hf_id":null,"github_repo":null,"context_length":1048576,"cost":{"per_judgement":0.000575,"cost_per_1k":0.575,"daily_cost":1.15,"monthly_cost":34.5},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"High-throughput judge; budget pick for large eval suites.","pricing_last_verified_at":null},{"slug":"aligneval-api","name":"AlignEval","class":"hosted-judge-api","vendor":"AlignEval","openrouter_id":null,"hf_id":null,"github_repo":null,"context_length":null,"cost":{"per_judgement":0.001,"cost_per_1k":1,"daily_cost":2,"monthly_cost":60},"price_ts":"2026-05-01","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Multi-rubric judge with calibration; per-call commercial pricing.","pricing_last_verified_at":"2026-05-01"},{"slug":"deepseek-r1","name":"DeepSeek R1","class":"frontier-generalist","vendor":"DeepSeek","openrouter_id":"deepseek/deepseek-r1","hf_id":null,"github_repo":null,"context_length":163840,"cost":{"per_judgement":0.0011749999999999998,"cost_per_1k":1.1749999999999998,"daily_cost":2.3499999999999996,"monthly_cost":70.49999999999999},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Reasoning variant; pricier but resolves ambiguous rubric calls.","pricing_last_verified_at":null},{"slug":"qwen3-max","name":"Qwen 3 Max","class":"frontier-generalist","vendor":"Alibaba","openrouter_id":"qwen/qwen3-max","hf_id":null,"github_repo":null,"context_length":262144,"cost":{"per_judgement":0.0013650000000000001,"cost_per_1k":1.3650000000000002,"daily_cost":2.73,"monthly_cost":81.9},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Strong on multilingual judge tasks.","pricing_last_verified_at":null},{"slug":"claude-haiku-4-5","name":"Claude Haiku 4.5","class":"frontier-generalist","vendor":"Anthropic","openrouter_id":"anthropic/claude-haiku-4.5","hf_id":null,"github_repo":null,"context_length":200000,"cost":{"per_judgement":0.00175,"cost_per_1k":1.75,"daily_cost":3.5000000000000004,"monthly_cost":105.00000000000001},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Cheap, fast, surprisingly good on rubric-style scoring.","pricing_last_verified_at":null},{"slug":"gpt-5","name":"GPT-5","class":"frontier-generalist","vendor":"OpenAI","openrouter_id":"openai/gpt-5","hf_id":null,"github_repo":null,"context_length":400000,"cost":{"per_judgement":0.0023750000000000004,"cost_per_1k":2.3750000000000004,"daily_cost":4.750000000000001,"monthly_cost":142.50000000000003},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Highest-quality frontier judge. Expensive. Use as gold-standard reference judge.","pricing_last_verified_at":null},{"slug":"gemini-2-5-pro","name":"Gemini 2.5 Pro","class":"frontier-generalist","vendor":"Google","openrouter_id":"google/gemini-2.5-pro","hf_id":null,"github_repo":null,"context_length":1048576,"cost":{"per_judgement":0.0023750000000000004,"cost_per_1k":2.3750000000000004,"daily_cost":4.750000000000001,"monthly_cost":142.50000000000003},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Long-context judge; strong on document-level grading.","pricing_last_verified_at":null},{"slug":"mistral-large-2411","name":"Mistral Large 2411","class":"frontier-generalist","vendor":"Mistral","openrouter_id":"mistralai/mistral-large-2411","hf_id":null,"github_repo":null,"context_length":131072,"cost":{"per_judgement":0.0033,"cost_per_1k":3.3,"daily_cost":6.6000000000000005,"monthly_cost":198.00000000000003},"price_ts":"2026-05-31T11:47:07.256Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Strong on European-language grading.","pricing_last_verified_at":null},{"slug":"command-r-plus","name":"Cohere Command R+","class":"frontier-generalist","vendor":"Cohere","openrouter_id":"cohere/command-r-plus-08-2024","hf_id":null,"github_repo":null,"context_length":128000,"cost":{"per_judgement":0.00425,"cost_per_1k":4.25,"daily_cost":8.5,"monthly_cost":255},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Tuned for instruction-following; works well as rubric judge.","pricing_last_verified_at":null},{"slug":"claude-sonnet-4-6","name":"Claude Sonnet 4.6","class":"frontier-generalist","vendor":"Anthropic","openrouter_id":"anthropic/claude-sonnet-4.6","hf_id":null,"github_repo":null,"context_length":1000000,"cost":{"per_judgement":0.00525,"cost_per_1k":5.25,"daily_cost":10.500000000000002,"monthly_cost":315.00000000000006},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Recommended actor-judge for Claude Code /goal validator role.","pricing_last_verified_at":null},{"slug":"claude-opus-4-7","name":"Claude Opus 4.7","class":"frontier-generalist","vendor":"Anthropic","openrouter_id":"anthropic/claude-opus-4.7","hf_id":null,"github_repo":null,"context_length":1000000,"cost":{"per_judgement":0.00875,"cost_per_1k":8.75,"daily_cost":17.5,"monthly_cost":525},"price_ts":"2026-06-02T19:15:00.541Z","hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Anthropic flagship; lowest position-bias of frontier judges in JudgeBench 2025-Q4.","pricing_last_verified_at":null},{"slug":"urm-llama-3-1-8b","name":"URM LLaMa-3.1 8B","class":"specialist-oss","vendor":"LxzGordon","openrouter_id":null,"hf_id":"LxzGordon/URM-LLaMa-3.1-8B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":69509,"downloads_all_time":null,"likes":13},"last_release":null,"price_delta_7d_pct":null,"notes":"Uncertainty-aware reward model.","pricing_last_verified_at":null},{"slug":"prometheus-7b-v2","name":"Prometheus 7B v2.0","class":"specialist-oss","vendor":"Prometheus Eval","openrouter_id":null,"hf_id":"prometheus-eval/prometheus-7b-v2.0","github_repo":"prometheus-eval/prometheus-eval","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":27196,"downloads_all_time":null,"likes":107},"last_release":null,"price_delta_7d_pct":null,"notes":"Direct-assessment + pairwise judge; trained from Mistral 7B on Feedback-Collection.","pricing_last_verified_at":null},{"slug":"flow-judge-v0-1","name":"Flow-Judge v0.1","class":"specialist-oss","vendor":"Flow AI","openrouter_id":null,"hf_id":"flowaicom/Flow-Judge-v0.1","github_repo":"flowaicom/flow-judge","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":11839,"downloads_all_time":null,"likes":71},"last_release":null,"price_delta_7d_pct":null,"notes":"3.8B (Phi-3.5-mini) judge with rubric input; aggressive size/quality trade.","pricing_last_verified_at":null},{"slug":"selene-1-mini-llama-3-1-8b","name":"Atla Selene 1 Mini (Llama-3.1 8B)","class":"specialist-oss","vendor":"Atla AI","openrouter_id":null,"hf_id":"AtlaAI/Selene-1-Mini-Llama-3.1-8B","github_repo":"atla-ai/selene-mini","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":2105,"downloads_all_time":null,"likes":104},"last_release":null,"price_delta_7d_pct":null,"notes":"8B SOTA small judge; outperforms GPT-4o-mini on Atla's RewardBench.","pricing_last_verified_at":null},{"slug":"grm-llama-3-8b","name":"GRM Llama-3 8B (reward-model-ft)","class":"specialist-oss","vendor":"Ray2333","openrouter_id":null,"hf_id":"Ray2333/GRM-Llama3-8B-rewardmodel-ft","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":1939,"downloads_all_time":null,"likes":1},"last_release":null,"price_delta_7d_pct":null,"notes":"Generative reward model; popular Llama-3 critic.","pricing_last_verified_at":null},{"slug":"skywork-critic-llama-3-1-8b","name":"Skywork Critic (Llama-3.1 8B)","class":"specialist-oss","vendor":"Skywork","openrouter_id":null,"hf_id":"Skywork/Skywork-Critic-Llama-3.1-8B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":1418,"downloads_all_time":null,"likes":14},"last_release":null,"price_delta_7d_pct":null,"notes":"Cheaper Skywork critic; pairwise judgement.","pricing_last_verified_at":null},{"slug":"glider-patronus","name":"GLIDER","class":"specialist-oss","vendor":"Patronus AI","openrouter_id":null,"hf_id":"PatronusAI/glider","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":1147,"downloads_all_time":null,"likes":44},"last_release":null,"price_delta_7d_pct":null,"notes":"3.8B fine-grained judge from Patronus AI; outputs scores+reasoning.","pricing_last_verified_at":null},{"slug":"judgelm-33b-v1","name":"JudgeLM 33B v1.0","class":"specialist-oss","vendor":"BAAI","openrouter_id":null,"hf_id":"BAAI/JudgeLM-33B-v1.0","github_repo":"baaivision/JudgeLM","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":767,"downloads_all_time":null,"likes":28},"last_release":null,"price_delta_7d_pct":null,"notes":"Original LLM-judge SFT family; pairwise judge.","pricing_last_verified_at":null},{"slug":"autoj-13b","name":"Auto-J 13B","class":"specialist-oss","vendor":"GAIR","openrouter_id":null,"hf_id":"GAIR/autoj-13b","github_repo":"GAIR-NLP/auto-j","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":743,"downloads_all_time":null,"likes":7},"last_release":null,"price_delta_7d_pct":null,"notes":"Critique + pairwise judge with rubric reasoning.","pricing_last_verified_at":null},{"slug":"skywork-critic-llama-3-1-70b","name":"Skywork Critic (Llama-3.1 70B)","class":"specialist-oss","vendor":"Skywork","openrouter_id":null,"hf_id":"Skywork/Skywork-Critic-Llama-3.1-70B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":668,"downloads_all_time":null,"likes":12},"last_release":null,"price_delta_7d_pct":null,"notes":"Top JudgeBench score among 70B-class open judges (Aug 2024 leaderboard).","pricing_last_verified_at":null},{"slug":"prometheus-bgb-8x7b-v2","name":"Prometheus BGB 8x7B v2.0","class":"specialist-oss","vendor":"Prometheus Eval","openrouter_id":null,"hf_id":"prometheus-eval/prometheus-bgb-8x7b-v2.0","github_repo":"prometheus-eval/prometheus-eval","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":605,"downloads_all_time":null,"likes":6},"last_release":null,"price_delta_7d_pct":null,"notes":"Larger Prometheus variant; near-frontier judge quality at self-hosted cost.","pricing_last_verified_at":null},{"slug":"qrm-llama-3-1-8b","name":"QRM Llama-3.1 8B","class":"specialist-oss","vendor":"Nicolinho","openrouter_id":null,"hf_id":"nicolinho/QRM-Llama3.1-8B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":75,"downloads_all_time":null,"likes":2},"last_release":null,"price_delta_7d_pct":null,"notes":"Quantile reward model; useful when you need distribution, not a point estimate.","pricing_last_verified_at":null},{"slug":"inf-orm-llama-3-1-70b","name":"INF-ORM Llama-3.1 70B","class":"specialist-oss","vendor":"Infly","openrouter_id":null,"hf_id":"infly/INF-ORM-Llama3.1-70B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":19,"downloads_all_time":null,"likes":27},"last_release":null,"price_delta_7d_pct":null,"notes":"Outcome reward model; pairwise judge with explicit reward head.","pricing_last_verified_at":null},{"slug":"pandalm-7b-v1","name":"PandaLM 7B v1","class":"specialist-oss","vendor":"WeOpenML","openrouter_id":null,"hf_id":"WeOpenML/PandaLM-7B-v1","github_repo":"WeOpenML/PandaLM","context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":17,"downloads_all_time":null,"likes":18},"last_release":null,"price_delta_7d_pct":null,"notes":"Reproducible judge for instruction-tuning ablations.","pricing_last_verified_at":null},{"slug":"self-taught-eval-llama-3-1-70b","name":"Self-Taught Evaluator (Llama-3.1 70B)","class":"specialist-oss","vendor":"Meta FAIR","openrouter_id":null,"hf_id":"facebook/Self-taught-evaluator-llama3.1-70B","github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":{"downloads_30d":0,"downloads_all_time":null,"likes":42},"last_release":null,"price_delta_7d_pct":null,"notes":"Meta FAIR's self-improvement judge; outperforms GPT-4 on RewardBench.","pricing_last_verified_at":null},{"slug":"deepseek-v3","name":"DeepSeek V3","class":"frontier-generalist","vendor":"DeepSeek","openrouter_id":"deepseek/deepseek-chat-v3","hf_id":null,"github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Open-weights frontier; competitive judge agreement at <1% of GPT-5 cost.","pricing_last_verified_at":null},{"slug":"glm-4-5","name":"GLM-4.5","class":"frontier-generalist","vendor":"Zhipu AI","openrouter_id":"zhipuai/glm-4.5","hf_id":null,"github_repo":null,"context_length":null,"cost":null,"price_ts":null,"hf":null,"last_release":null,"price_delta_7d_pct":null,"notes":"Cheap frontier judge from Zhipu; useful as a third-vote tie-breaker.","pricing_last_verified_at":null}]}