[{"rank":1,"model_name":"Claude-Opus-4-5-20251101 (FC)","model_norm":"claudeopus4520251101","overall_acc":77.47,"ast_acc":88.58,"exec_acc":79.79,"irrelevance":84.72,"organization":"Anthropic","cost":86.55,"latency":4.38,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":2,"model_name":"Claude-Sonnet-4-5-20250929 (FC)","model_norm":"claudesonnet4520250929","overall_acc":73.24,"ast_acc":88.65,"exec_acc":81.13,"irrelevance":86.61,"organization":"Anthropic","cost":43.73,"latency":4.31,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":3,"model_name":"Gemini-3-Pro-Preview (Prompt)","model_norm":"gemini3propreview","overall_acc":72.51,"ast_acc":90.65,"exec_acc":83.12,"irrelevance":85.59,"organization":"Google","cost":298.47,"latency":12.08,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":4,"model_name":"GLM-4.6 (FC thinking)","model_norm":"glm46","overall_acc":72.38,"ast_acc":87.56,"exec_acc":80.9,"irrelevance":84.96,"organization":"Zhipu AI","cost":4.64,"latency":4.34,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":5,"model_name":"Grok-4-1-fast-reasoning (FC)","model_norm":"grok41fastreasoning","overall_acc":69.57,"ast_acc":88.27,"exec_acc":78.46,"irrelevance":79.43,"organization":"xAI","cost":17.26,"latency":6.74,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":6,"model_name":"Claude-Haiku-4-5-20251001 (FC)","model_norm":"claudehaiku4520251001","overall_acc":68.7,"ast_acc":86.5,"exec_acc":78.68,"irrelevance":85.11,"organization":"Anthropic","cost":14.23,"latency":1.68,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":7,"model_name":"Gemini-3-Pro-Preview (FC)","model_norm":"gemini3propreview","overall_acc":68.14,"ast_acc":85.75,"exec_acc":81.72,"irrelevance":77.85,"organization":"Google","cost":224.69,"latency":15.87,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":8,"model_name":"o3-2025-04-16 (Prompt)","model_norm":"o320250416","overall_acc":63.05,"ast_acc":81.94,"exec_acc":73.21,"irrelevance":83.98,"organization":"OpenAI","cost":234.64,"latency":4.83,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":9,"model_name":"Grok-4-0709 (Prompt)","model_norm":"grok40709","overall_acc":62.97,"ast_acc":82.75,"exec_acc":72.54,"irrelevance":84.3,"organization":"xAI","cost":348.19,"latency":30.38,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":10,"model_name":"Grok-4-0709 (FC)","model_norm":"grok40709","overall_acc":61.38,"ast_acc":85.38,"exec_acc":75.57,"irrelevance":75.4,"organization":"xAI","cost":355.17,"latency":15.49,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":11,"model_name":"Moonshotai-Kimi-K2-Instruct (FC)","model_norm":"moonshotaikimik2instruct","overall_acc":59.06,"ast_acc":81.6,"exec_acc":78.68,"irrelevance":87.34,"organization":"MoonshotAI","cost":6.19,"latency":6.4,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":12,"model_name":"Grok-4-1-fast-non-reasoning (FC)","model_norm":"grok41fastnonreasoning","overall_acc":58.29,"ast_acc":88.13,"exec_acc":77.94,"irrelevance":74.09,"organization":"xAI","cost":16.27,"latency":2.29,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":13,"model_name":"Command A Reasoning (FC)","model_norm":"commandareasoning","overall_acc":57.06,"ast_acc":86.27,"exec_acc":78.61,"irrelevance":86.75,"organization":"Cohere","cost":3.04,"latency":3.44,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":14,"model_name":"DeepSeek-V3.2-Exp (Prompt + Thinking)","model_norm":"deepseekv32exp","overall_acc":56.73,"ast_acc":85.52,"exec_acc":76.02,"irrelevance":67,"organization":"DeepSeek","cost":57.75,"latency":37.89,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":15,"model_name":"Gemini-2.5-Flash (FC)","model_norm":"gemini25flash","overall_acc":56.24,"ast_acc":84.96,"exec_acc":74.39,"irrelevance":93.67,"organization":"Google","cost":26.36,"latency":2.99,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":16,"model_name":"GPT-5.2-2025-12-11 (FC)","model_norm":"gpt5220251211","overall_acc":55.87,"ast_acc":81.85,"exec_acc":70.39,"irrelevance":79.42,"organization":"OpenAI","cost":85.65,"latency":2.23,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":17,"model_name":"GPT-5-mini-2025-08-07 (FC)","model_norm":"gpt5mini20250807","overall_acc":55.46,"ast_acc":69.85,"exec_acc":58.62,"irrelevance":91.01,"organization":"OpenAI","cost":22.18,"latency":8.32,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":18,"model_name":"xLAM-2-32b-fc-r (FC)","model_norm":"xlam232bfcr","overall_acc":54.66,"ast_acc":89.6,"exec_acc":75.5,"irrelevance":80.23,"organization":"Salesforce","cost":6,"latency":6.94,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":19,"model_name":"DeepSeek-V3.2-Exp (FC)","model_norm":"deepseekv32exp","overall_acc":54.12,"ast_acc":34.85,"exec_acc":53.66,"irrelevance":93.18,"organization":"DeepSeek","cost":6.71,"latency":5.83,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":20,"model_name":"GPT-4.1-2025-04-14 (FC)","model_norm":"gpt4120250414","overall_acc":53.96,"ast_acc":82.79,"exec_acc":69.95,"irrelevance":86.52,"organization":"OpenAI","cost":100.75,"latency":1.63,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":21,"model_name":"o4-mini-2025-04-16 (FC)","model_norm":"o4mini20250416","overall_acc":53.24,"ast_acc":37.73,"exec_acc":66.1,"irrelevance":83.91,"organization":"OpenAI","cost":81.91,"latency":3.71,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":22,"model_name":"xLAM-2-70b-fc-r (FC)","model_norm":"xlam270bfcr","overall_acc":53.07,"ast_acc":88.44,"exec_acc":72.17,"irrelevance":79.11,"organization":"Salesforce","cost":25.1,"latency":28.06,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":23,"model_name":"Qwen3-235B-A22B-Instruct-2507 (Prompt)","model_norm":"qwen3235ba22binstruct2507","overall_acc":52.15,"ast_acc":90.33,"exec_acc":78.68,"irrelevance":78.89,"organization":"Qwen","cost":3.12,"latency":2.56,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":24,"model_name":"GPT-5-nano-2025-08-07 (FC)","model_norm":"gpt5nano20250807","overall_acc":51.45,"ast_acc":68,"exec_acc":59.44,"irrelevance":89.1,"organization":"OpenAI","cost":8.79,"latency":10.36,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":25,"model_name":"Nanbeige4-3B-Thinking-2511 (FC)","model_norm":"nanbeige43bthinking2511","overall_acc":51.4,"ast_acc":81.58,"exec_acc":79.42,"irrelevance":83.09,"organization":"Nanbeige","cost":14.14,"latency":13.46,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":26,"model_name":"Gemini-2.5-Flash (Prompt)","model_norm":"gemini25flash","overall_acc":50.9,"ast_acc":88.08,"exec_acc":78.16,"irrelevance":91.09,"organization":"Google","cost":33.45,"latency":3.18,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":27,"model_name":"GPT-4.1-mini-2025-04-14 (FC)","model_norm":"gpt41mini20250414","overall_acc":50.45,"ast_acc":83.83,"exec_acc":68.84,"irrelevance":81.69,"organization":"OpenAI","cost":19.25,"latency":1.32,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":28,"model_name":"o4-mini-2025-04-16 (Prompt)","model_norm":"o4mini20250416","overall_acc":50.26,"ast_acc":81.29,"exec_acc":70.76,"irrelevance":87.16,"organization":"OpenAI","cost":133.63,"latency":4.47,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":29,"model_name":"Qwen3-32B (FC)","model_norm":"qwen332b","overall_acc":48.71,"ast_acc":88.77,"exec_acc":82.01,"irrelevance":76.37,"organization":"Qwen","cost":153.08,"latency":169.87,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":30,"model_name":"o3-2025-04-16 (FC)","model_norm":"o320250416","overall_acc":48.56,"ast_acc":40.38,"exec_acc":66.17,"irrelevance":86.13,"organization":"OpenAI","cost":133.45,"latency":3.5,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":31,"model_name":"Qwen3-235B-A22B-Instruct-2507 (FC)","model_norm":"qwen3235ba22binstruct2507","overall_acc":47.99,"ast_acc":37.4,"exec_acc":68.91,"irrelevance":81.73,"organization":"Qwen","cost":2.5,"latency":2.57,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":32,"model_name":"Nanbeige3.5-Pro-Thinking (FC)","model_norm":"nanbeige35prothinking","overall_acc":47.68,"ast_acc":38.35,"exec_acc":69.95,"irrelevance":74.2,"organization":"Nanbeige","cost":23.46,"latency":21.12,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":33,"model_name":"Qwen3-32B (Prompt)","model_norm":"qwen332b","overall_acc":46.78,"ast_acc":90.27,"exec_acc":82.01,"irrelevance":82.39,"organization":"Qwen","cost":199.47,"latency":167.54,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":34,"model_name":"xLAM-2-8b-fc-r (FC)","model_norm":"xlam28bfcr","overall_acc":46.68,"ast_acc":84.58,"exec_acc":67.95,"irrelevance":63.28,"organization":"Salesforce","cost":20.92,"latency":22.65,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":35,"model_name":"Command A (FC)","model_norm":"commanda","overall_acc":46.49,"ast_acc":87.56,"exec_acc":78.53,"irrelevance":84.19,"organization":"Cohere","cost":91.37,"latency":2.09,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":36,"model_name":"BitAgent-Bounty-8B","model_norm":"bitagentbounty8b","overall_acc":46.23,"ast_acc":81.6,"exec_acc":93.12,"irrelevance":97.48,"organization":"Bittensor","cost":18.02,"latency":16.52,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":37,"model_name":"Arch-Agent-32B","model_norm":"archagent32b","overall_acc":45.37,"ast_acc":88.92,"exec_acc":80.68,"irrelevance":82.15,"organization":"katanemo","cost":8.87,"latency":9.44,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":38,"model_name":"GPT-5.2-2025-12-11 (Prompt)","model_norm":"gpt5220251211","overall_acc":45.27,"ast_acc":78.29,"exec_acc":67.14,"irrelevance":87.26,"organization":"OpenAI","cost":164.58,"latency":4.21,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":39,"model_name":"Qwen3-8B (FC)","model_norm":"qwen38b","overall_acc":42.57,"ast_acc":87.58,"exec_acc":80.53,"irrelevance":79.07,"organization":"Qwen","cost":43.32,"latency":51.36,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":40,"model_name":"ToolACE-2-8B (FC)","model_norm":"toolace28b","overall_acc":42.44,"ast_acc":87.1,"exec_acc":77.42,"irrelevance":90.79,"organization":"Huawei Noah & USTC","cost":24.43,"latency":15.95,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":41,"model_name":"Qwen3-30B-A3B-Instruct-2507 (FC)","model_norm":"qwen330ba3binstruct2507","overall_acc":41.39,"ast_acc":85.77,"exec_acc":77.94,"irrelevance":79.9,"organization":"Qwen","cost":5.62,"latency":5.95,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":42,"model_name":"xLAM-2-3b-fc-r (FC)","model_norm":"xlam23bfcr","overall_acc":41.22,"ast_acc":82.96,"exec_acc":62.92,"irrelevance":63.45,"organization":"Salesforce","cost":3.36,"latency":3.8,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":43,"model_name":"Qwen3-14B (FC)","model_norm":"qwen314b","overall_acc":41.03,"ast_acc":84.94,"exec_acc":80.01,"irrelevance":81.94,"organization":"Qwen","cost":3.38,"latency":4.5,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":44,"model_name":"Qwen3-8B (Prompt)","model_norm":"qwen38b","overall_acc":40.43,"ast_acc":88.56,"exec_acc":80.09,"irrelevance":82.27,"organization":"Qwen","cost":63.95,"latency":54.17,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":45,"model_name":"GPT-4.1-2025-04-14 (Prompt)","model_norm":"gpt4120250414","overall_acc":39.38,"ast_acc":88.69,"exec_acc":78.9,"irrelevance":83.99,"organization":"OpenAI","cost":145.85,"latency":1.2,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":46,"model_name":"mistral-large-2411 (FC)","model_norm":"mistrallarge2411","overall_acc":38.37,"ast_acc":84.65,"exec_acc":81.87,"irrelevance":68.92,"organization":"Mistral AI","cost":115.98,"latency":2.04,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":47,"model_name":"Qwen3-14B (Prompt)","model_norm":"qwen314b","overall_acc":37.77,"ast_acc":89.46,"exec_acc":79.35,"irrelevance":87.18,"organization":"Qwen","cost":1.35,"latency":1.2,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":48,"model_name":"Mistral-Medium-2505","model_norm":"mistralmedium2505","overall_acc":37.69,"ast_acc":85.33,"exec_acc":66.03,"irrelevance":74.49,"organization":"Mistral AI","cost":36.51,"latency":1.21,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":49,"model_name":"Mistral-Medium-2505 (FC)","model_norm":"mistralmedium2505","overall_acc":37.56,"ast_acc":67.44,"exec_acc":67.95,"irrelevance":91.95,"organization":"Mistral AI","cost":18.8,"latency":1.6,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":50,"model_name":"Llama-4-Maverick-17B-128E-Instruct-FP8 (FC)","model_norm":"llama4maverick17b128einstructfp8","overall_acc":37.29,"ast_acc":88.65,"exec_acc":73.65,"irrelevance":55.97,"organization":"Meta","cost":18.25,"latency":18.43,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":51,"model_name":"Mistral-small-2506 (FC)","model_norm":"mistralsmall2506","overall_acc":37.15,"ast_acc":73.6,"exec_acc":77.28,"irrelevance":87.94,"organization":"Mistral AI","cost":5.2,"latency":1.48,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":52,"model_name":"Gemini-2.5-Flash-Lite (FC)","model_norm":"gemini25flashlite","overall_acc":36.87,"ast_acc":86.6,"exec_acc":65.8,"irrelevance":92.5,"organization":"Google","cost":7.55,"latency":1.18,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":53,"model_name":"Qwen3-30B-A3B-Instruct-2507 (Prompt)","model_norm":"qwen330ba3binstruct2507","overall_acc":36.7,"ast_acc":88.92,"exec_acc":78.39,"irrelevance":74.85,"organization":"Qwen","cost":1.56,"latency":1.24,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":54,"model_name":"Qwen3-4B-Instruct-2507 (FC)","model_norm":"qwen34binstruct2507","overall_acc":35.68,"ast_acc":87.88,"exec_acc":76.39,"irrelevance":84.93,"organization":"Qwen","cost":6.37,"latency":7.61,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":55,"model_name":"Qwen3-4B-Instruct-2507 (Prompt)","model_norm":"qwen34binstruct2507","overall_acc":35.52,"ast_acc":86.44,"exec_acc":74.69,"irrelevance":75.87,"organization":"Qwen","cost":53.66,"latency":44.7,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":56,"model_name":"Arch-Agent-3B","model_norm":"archagent3b","overall_acc":35.36,"ast_acc":86.67,"exec_acc":72.91,"irrelevance":74.67,"organization":"katanemo","cost":3.7,"latency":3.56,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":57,"model_name":"Claude-Opus-4-5-20251101 (Prompt)","model_norm":"claudeopus4520251101","overall_acc":33.47,"ast_acc":89.65,"exec_acc":76.02,"irrelevance":90.75,"organization":"Anthropic","cost":88.33,"latency":3.76,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":58,"model_name":"GPT-4.1-nano-2025-04-14 (FC)","model_norm":"gpt41nano20250414","overall_acc":33.05,"ast_acc":72.98,"exec_acc":60.77,"irrelevance":66,"organization":"OpenAI","cost":5.66,"latency":1.44,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":59,"model_name":"Mistral-Small-2506 (Prompt)","model_norm":"mistralsmall2506","overall_acc":32.38,"ast_acc":89.69,"exec_acc":79.05,"irrelevance":65.73,"organization":"Mistral AI","cost":6.91,"latency":0.92,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":60,"model_name":"Arch-Agent-1.5B","model_norm":"archagent15b","overall_acc":32.14,"ast_acc":82.67,"exec_acc":67.73,"irrelevance":74.83,"organization":"katanemo","cost":2.45,"latency":2.38,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":61,"model_name":"Command R7B (FC)","model_norm":"commandr7b","overall_acc":32.07,"ast_acc":80.96,"exec_acc":69.06,"irrelevance":81.65,"organization":"Cohere","cost":1.5,"latency":1.38,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":62,"model_name":"Llama-3.3-70B-Instruct (FC)","model_norm":"llama3370binstruct","overall_acc":31.9,"ast_acc":88.02,"exec_acc":76.61,"irrelevance":53.53,"organization":"Meta","cost":29.54,"latency":26.11,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":63,"model_name":"mistral-large-2411 (Prompt)","model_norm":"mistrallarge2411","overall_acc":31.84,"ast_acc":83,"exec_acc":68.1,"irrelevance":38.77,"organization":"Mistral AI","cost":232.42,"latency":1.82,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":64,"model_name":"Hammer2.1-7b (FC)","model_norm":"hammer217b","overall_acc":31.67,"ast_acc":85.5,"exec_acc":69.5,"irrelevance":90.12,"organization":"MadeAgents","cost":4.99,"latency":5.77,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":65,"model_name":"xLAM-2-1b-fc-r (FC)","model_norm":"xlam21bfcr","overall_acc":30.44,"ast_acc":69.04,"exec_acc":55.14,"irrelevance":64.47,"organization":"Salesforce","cost":2.79,"latency":2.84,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":66,"model_name":"Gemma-3-12b-it (Prompt)","model_norm":"gemma312bit","overall_acc":30.43,"ast_acc":79.44,"exec_acc":74.24,"irrelevance":70.29,"organization":"Google","cost":10.77,"latency":11.1,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":67,"model_name":"GPT-4.1-mini-2025-04-14 (Prompt)","model_norm":"gpt41mini20250414","overall_acc":29.73,"ast_acc":84.6,"exec_acc":74.76,"irrelevance":73.88,"organization":"OpenAI","cost":20.52,"latency":1.36,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":68,"model_name":"Hammer2.1-3b (FC)","model_norm":"hammer213b","overall_acc":29.71,"ast_acc":84.96,"exec_acc":70.54,"irrelevance":86.12,"organization":"MadeAgents","cost":10.89,"latency":11.24,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":69,"model_name":"Gemma-3-27b-it (Prompt)","model_norm":"gemma327bit","overall_acc":29.47,"ast_acc":87.17,"exec_acc":74.54,"irrelevance":73.67,"organization":"Google","cost":11.82,"latency":10.88,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":70,"model_name":"Phi-4 (Prompt)","model_norm":"phi4","overall_acc":28.79,"ast_acc":69.56,"exec_acc":60.7,"irrelevance":87.55,"organization":"Microsoft","cost":8.72,"latency":9.49,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":71,"model_name":"Qwen3-1.7B (FC)","model_norm":"qwen317b","overall_acc":28.41,"ast_acc":82.92,"exec_acc":74.61,"irrelevance":76.54,"organization":"Qwen","cost":4.33,"latency":5.12,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":72,"model_name":"Llama-4-Scout-17B-16E-Instruct (FC)","model_norm":"llama4scout17b16einstruct","overall_acc":28.13,"ast_acc":89.38,"exec_acc":74.69,"irrelevance":44.92,"organization":"Meta","cost":24.68,"latency":17.86,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":73,"model_name":"Gemini-2.5-Flash-Lite (Prompt)","model_norm":"gemini25flashlite","overall_acc":28.03,"ast_acc":83.9,"exec_acc":54.85,"irrelevance":93.33,"organization":"Google","cost":7.05,"latency":1,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":74,"model_name":"CoALM-70B","model_norm":"coalm70b","overall_acc":27.99,"ast_acc":83.44,"exec_acc":67.28,"irrelevance":85.65,"organization":"UIUC + Oumi","cost":19.89,"latency":16.22,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":75,"model_name":"Hammer2.1-1.5b (FC)","model_norm":"hammer2115b","overall_acc":27.88,"ast_acc":82.98,"exec_acc":69.5,"irrelevance":79.4,"organization":"MadeAgents","cost":6.83,"latency":6.28,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":76,"model_name":"palmyra-x-004 (FC)","model_norm":"palmyrax004","overall_acc":27.87,"ast_acc":87.46,"exec_acc":77.87,"irrelevance":80.99,"organization":"Writer","cost":178.15,"latency":3.71,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":77,"model_name":"GPT-5-mini-2025-08-07 (Prompt)","model_norm":"gpt5mini20250807","overall_acc":27.83,"ast_acc":68.04,"exec_acc":62.55,"irrelevance":55.71,"organization":"OpenAI","cost":82.74,"latency":8.89,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":78,"model_name":"Open-Mistral-Nemo-2407 (FC)","model_norm":"openmistralnemo2407","overall_acc":27.63,"ast_acc":82.81,"exec_acc":73.8,"irrelevance":61.77,"organization":"Mistral AI","cost":8.12,"latency":1.07,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":79,"model_name":"GPT-5-nano-2025-08-07 (Prompt)","model_norm":"gpt5nano20250807","overall_acc":27.55,"ast_acc":80.81,"exec_acc":70.69,"irrelevance":45.75,"organization":"OpenAI","cost":21.47,"latency":10.67,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":80,"model_name":"Amazon-Nova-2-Lite-v1:0 (FC)","model_norm":"amazonnova2litev10","overall_acc":27.1,"ast_acc":86.96,"exec_acc":80.83,"irrelevance":82.11,"organization":"Amazon","cost":78.19,"latency":8.55,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":81,"model_name":"Granite-3.1-8B-Instruct (FC)","model_norm":"granite318binstruct","overall_acc":27.1,"ast_acc":78.33,"exec_acc":60.33,"irrelevance":79.98,"organization":"IBM","cost":9.32,"latency":13.23,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":82,"model_name":"Falcon3-10B-Instruct (FC)","model_norm":"falcon310binstruct","overall_acc":27.01,"ast_acc":85,"exec_acc":75.43,"irrelevance":32.09,"organization":"TII UAE","cost":52.59,"latency":69.27,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":83,"model_name":"Granite-3.2-8B-Instruct (FC)","model_norm":"granite328binstruct","overall_acc":26.87,"ast_acc":79.77,"exec_acc":60.33,"irrelevance":80.53,"organization":"IBM","cost":25.02,"latency":36.13,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":84,"model_name":"CoALM-8B","model_norm":"coalm8b","overall_acc":26.81,"ast_acc":84.87,"exec_acc":66.77,"irrelevance":86.9,"organization":"UIUC + Oumi","cost":25.33,"latency":20.36,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":85,"model_name":"Llama-3.1-8B-Instruct (Prompt)","model_norm":"llama318binstruct","overall_acc":25.83,"ast_acc":84,"exec_acc":70.76,"irrelevance":42.7,"organization":"Meta","cost":7.49,"latency":5.6,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":86,"model_name":"MiniCPM3-4B-FC (FC)","model_norm":"minicpm34bfc","overall_acc":25.55,"ast_acc":81.75,"exec_acc":65.21,"irrelevance":72.84,"organization":"openbmb","cost":54.05,"latency":118.62,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":87,"model_name":"Claude-Haiku-4-5-20251001 (Prompt)","model_norm":"claudehaiku4520251001","overall_acc":25.26,"ast_acc":55.42,"exec_acc":52.48,"irrelevance":95.29,"organization":"Anthropic","cost":45.13,"latency":3.75,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":88,"model_name":"Amazon-Nova-Pro-v1:0 (FC)","model_norm":"amazonnovaprov10","overall_acc":24.97,"ast_acc":86.58,"exec_acc":78.53,"irrelevance":70.06,"organization":"Amazon","cost":48.44,"latency":2.25,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":89,"model_name":"Claude-Sonnet-4-5-20250929 (Prompt)","model_norm":"claudesonnet4520250929","overall_acc":24.9,"ast_acc":59.81,"exec_acc":46.56,"irrelevance":95.03,"organization":"Anthropic","cost":47.82,"latency":3.84,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":90,"model_name":"GPT-4.1-nano-2025-04-14 (Prompt)","model_norm":"gpt41nano20250414","overall_acc":24.88,"ast_acc":72.44,"exec_acc":50.33,"irrelevance":83.44,"organization":"OpenAI","cost":7.42,"latency":1.02,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":91,"model_name":"Falcon3-7B-Instruct (FC)","model_norm":"falcon37binstruct","overall_acc":24.03,"ast_acc":82.69,"exec_acc":68.32,"irrelevance":31.99,"organization":"TII UAE","cost":73.61,"latency":93.11,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":92,"model_name":"Qwen3-0.6B (FC)","model_norm":"qwen306b","overall_acc":23.93,"ast_acc":71.79,"exec_acc":56.62,"irrelevance":80.84,"organization":"Qwen","cost":0.46,"latency":0.68,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":93,"model_name":"Granite-20b-FunctionCalling (FC)","model_norm":"granite20bfunctioncalling","overall_acc":23.23,"ast_acc":82.35,"exec_acc":58.7,"irrelevance":75.13,"organization":"IBM","cost":5.23,"latency":3.2,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":94,"model_name":"Qwen3-0.6B (Prompt)","model_norm":"qwen306b","overall_acc":22.38,"ast_acc":70,"exec_acc":49.37,"irrelevance":82.5,"organization":"Qwen","cost":3.65,"latency":3.1,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":95,"model_name":"Amazon-Nova-Micro-v1:0 (FC)","model_norm":"amazonnovamicrov10","overall_acc":22.29,"ast_acc":74.1,"exec_acc":66.32,"irrelevance":70.65,"organization":"Amazon","cost":1.81,"latency":1.12,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":96,"model_name":"RZN-T (Prompt)","model_norm":"rznt","overall_acc":22.25,"ast_acc":67.94,"exec_acc":49.74,"irrelevance":82.41,"organization":"Phronetic AI","cost":12.31,"latency":12.32,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":97,"model_name":"MiniCPM3-4B (Prompt)","model_norm":"minicpm34b","overall_acc":22.08,"ast_acc":70.54,"exec_acc":43.15,"irrelevance":73.71,"organization":"openbmb","cost":29.83,"latency":31.18,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":98,"model_name":"Llama-3.2-3B-Instruct (FC)","model_norm":"llama323binstruct","overall_acc":21.95,"ast_acc":82.67,"exec_acc":58.33,"irrelevance":52.06,"organization":"Meta","cost":6.2,"latency":6.1,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":99,"model_name":"Bielik-11B-v2.3-Instruct (Prompt)","model_norm":"bielik11bv23instruct","overall_acc":21.9,"ast_acc":81.5,"exec_acc":67.8,"irrelevance":36.01,"organization":"SpeakLeash & ACK Cyfronet AGH","cost":22.44,"latency":23.75,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":100,"model_name":"Hammer2.1-0.5b (FC)","model_norm":"hammer2105b","overall_acc":21.22,"ast_acc":65.98,"exec_acc":54.63,"irrelevance":80.79,"organization":"MadeAgents","cost":2.82,"latency":2.79,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":101,"model_name":"Gemma-3-4b-it (Prompt)","model_norm":"gemma34bit","overall_acc":19.62,"ast_acc":61.12,"exec_acc":60.84,"irrelevance":53.94,"organization":"Google","cost":4.14,"latency":4.69,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":102,"model_name":"Open-Mistral-Nemo-2407 (Prompt)","model_norm":"openmistralnemo2407","overall_acc":19.31,"ast_acc":88.46,"exec_acc":73.95,"irrelevance":6.28,"organization":"Mistral AI","cost":13.8,"latency":0.84,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":103,"model_name":"Granite-4.0-350m (FC)","model_norm":"granite40350m","overall_acc":18.98,"ast_acc":67.92,"exec_acc":46.11,"irrelevance":60.84,"organization":"IBM","cost":1.44,"latency":1.74,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":104,"model_name":"Falcon3-3B-Instruct (FC)","model_norm":"falcon33binstruct","overall_acc":16.25,"ast_acc":54.62,"exec_acc":54.48,"irrelevance":32.92,"organization":"TII UAE","cost":36.7,"latency":38.52,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":105,"model_name":"Ministral-8B-Instruct-2410 (FC)","model_norm":"ministral8binstruct2410","overall_acc":11.1,"ast_acc":0,"exec_acc":0,"irrelevance":100,"organization":"Mistral AI","cost":70.01,"latency":82.07,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":106,"model_name":"Falcon3-1B-Instruct (FC)","model_norm":"falcon31binstruct","overall_acc":11.08,"ast_acc":9.02,"exec_acc":2.89,"irrelevance":87.3,"organization":"TII UAE","cost":1.72,"latency":5.23,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":107,"model_name":"Llama-3.2-1B-Instruct (FC)","model_norm":"llama321binstruct","overall_acc":10.82,"ast_acc":38.38,"exec_acc":11.77,"irrelevance":51.57,"organization":"Meta","cost":1.64,"latency":3.21,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":108,"model_name":"Llama-3.1-Nemotron-Ultra-253B-v1 (FC)","model_norm":"llama31nemotronultra253bv1","overall_acc":10,"ast_acc":0,"exec_acc":0,"irrelevance":100,"organization":"NVIDIA","cost":0.72,"latency":1.42,"fetched_at":"2026-05-21T03:00:00.396Z"},{"rank":109,"model_name":"Gemma-3-1b-it (Prompt)","model_norm":"gemma31bit","overall_acc":7.17,"ast_acc":20.21,"exec_acc":11.84,"irrelevance":33.18,"organization":"Google","cost":3.4,"latency":3.98,"fetched_at":"2026-05-21T03:00:00.396Z"}]