{"data":[{"created":1776355031,"id":"claude-opus-4-7","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":47500,"output_token_price_per_m":237500,"title":"claude-opus-4-7","description":"Most intelligent model for building agents and coding","tags":[],"context_size":1000000,"status":1,"display_name":"claude-opus-4-7","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1756965597,"id":"google/gemma-3-12b-it","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":500,"output_token_price_per_m":1000,"title":"google/gemma-3-12b-it","description":"","tags":[],"context_size":131072,"status":1,"display_name":"Gemma3 12B","model_type":"chat","max_output_tokens":8192,"features":["structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1774861905,"id":"claude-sonnet-4-6-dd","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16500,"output_token_price_per_m":82500,"title":"claude-sonnet-4-6-dd","description":"","tags":[],"context_size":1000000,"status":1,"display_name":"claude-sonnet-4-6-dd","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1771374729,"id":"claude-sonnet-4-6","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"claude-sonnet-4-6","description":"Our smartest model. Best for complex agents, coding, and most advanced tasks","tags":[],"context_size":1000000,"status":1,"display_name":"claude-sonnet-4-6","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1774862417,"id":"claude-opus-4-6-dd","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":27500,"output_token_price_per_m":137500,"title":"claude-opus-4-6-dd","description":"","tags":[],"context_size":1000000,"status":1,"display_name":"claude-opus-4-6-dd","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1770344809,"id":"claude-opus-4-6","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":47500,"output_token_price_per_m":237500,"title":"claude-opus-4-6","description":"Most intelligent model for building agents and coding","tags":[],"context_size":1000000,"status":1,"display_name":"claude-opus-4-6","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1766719516,"id":"claude-opus-4-5-20251101-dd","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":27500,"output_token_price_per_m":137500,"title":"claude-opus-4-5-20251101-dd","description":"","tags":[],"context_size":200000,"status":1,"display_name":"claude-opus-4-5-20251101-dd","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1764038687,"id":"claude-opus-4-5-20251101","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":47500,"output_token_price_per_m":237500,"title":"claude-opus-4-5-20251101","description":"","tags":[],"context_size":200000,"status":1,"display_name":"claude-opus-4-5-20251101","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1774862587,"id":"claude-haiku-4-5-20251001-dd","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":5500,"output_token_price_per_m":27500,"title":"claude-haiku-4-5-20251001-dd","description":"","tags":[],"context_size":200000,"status":1,"display_name":"claude-haiku-4-5-20251001-dd","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1760579710,"id":"claude-haiku-4-5-20251001","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":10000,"output_token_price_per_m":50000,"title":"claude-haiku-4-5-20251001","description":"","tags":[],"context_size":20000,"status":1,"display_name":"claude-haiku-4-5-20251001","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1766719508,"id":"claude-sonnet-4-5-20250929-dd","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16500,"output_token_price_per_m":82500,"title":"claude-sonnet-4-5-20250929-dd","description":"","tags":[],"context_size":200000,"status":1,"display_name":"claude-sonnet-4-5-20250929-dd","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1759195000,"id":"claude-sonnet-4-5-20250929","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"claude-sonnet-4-5-20250929","description":"","tags":[],"context_size":200000,"status":1,"display_name":"claude-sonnet-4-5-20250929","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1753165215,"id":"claude-sonnet-4-20250514","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"claude-sonnet-4-20250514","description":"支持prompt cache的模型","tags":[],"context_size":200000,"status":1,"display_name":"claude-sonnet-4-20250514","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1753257166,"id":"claude-opus-4-20250514","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":142500,"output_token_price_per_m":712500,"title":"claude-opus-4-20250514","description":"支持prompt cache的模型","tags":[],"context_size":200000,"status":4,"display_name":"claude-opus-4-20250514","model_type":"chat","max_output_tokens":32000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1753165621,"id":"claude-3-7-sonnet-20250219","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"claude-3-7-sonnet-20250219","description":"支持prompt cache的模型","tags":[],"context_size":200000,"status":4,"display_name":"claude-3-7-sonnet-20250219","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1753165576,"id":"claude-3-5-sonnet-20241022","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"claude-3-5-sonnet-20241022","description":"支持prompt cache的模型","tags":[],"context_size":200000,"status":4,"display_name":"claude-3-5-sonnet-20241022","model_type":"chat","max_output_tokens":8192,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747644168,"id":"claude-3-haiku-20240307","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2375,"output_token_price_per_m":11875,"title":"claude-3-haiku-20240307","description":"claude-3-haiku-20240307","tags":[],"context_size":200000,"status":4,"display_name":"claude-3-haiku-20240307","model_type":"chat","max_output_tokens":4096,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747643866,"id":"claude-3-5-haiku-20241022","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":7600,"output_token_price_per_m":38000,"title":"claude-3-5-haiku-20241022","description":"claude-3-5-haiku-20241022","tags":[],"context_size":200000,"status":4,"display_name":"claude-3-5-haiku-20241022","model_type":"chat","max_output_tokens":8192,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1772590258,"id":"gemini-3.1-flash-lite-preview","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2375,"output_token_price_per_m":14250,"title":"gemini-3.1-flash-lite-preview","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-3.1-flash-lite-preview","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1771556845,"id":"gemini-3.1-pro-preview","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":19000,"output_token_price_per_m":114000,"title":"gemini-3.1-pro-preview","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-3.1-pro-preview","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1766022389,"id":"gemini-3-flash-preview","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":4750,"output_token_price_per_m":28500,"title":"gemini-3-flash-preview","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-3-flash-preview","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1763518106,"id":"gemini-3-pro-preview","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":19000,"output_token_price_per_m":114000,"title":"gemini-3-pro-preview","description":"","tags":[],"context_size":1048576,"status":4,"display_name":"gemini-3-pro-preview","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1759997753,"id":"gemini-2.5-flash-lite-preview-09-2025","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":950,"output_token_price_per_m":3800,"title":"gemini-2.5-flash-lite-preview-09-2025","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-flash-lite-preview-09-2025","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1753697639,"id":"gemini-2.5-flash-lite","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":950,"output_token_price_per_m":3800,"title":"gemini-2.5-flash-lite","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-flash-lite","model_type":"chat","max_output_tokens":65535,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1753696065,"id":"gemini-2.5-pro","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gemini-2.5-pro","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-pro","model_type":"chat","max_output_tokens":65535,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1753695995,"id":"gemini-2.5-flash","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2850,"output_token_price_per_m":23750,"title":"gemini-2.5-flash","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-flash","model_type":"chat","max_output_tokens":65535,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1750397784,"id":"gemini-2.5-flash-lite-preview-06-17","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":950,"output_token_price_per_m":3800,"title":"gemini-2.5-flash-lite-preview-06-17","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-flash-lite-preview-06-17","model_type":"chat","max_output_tokens":65535,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","video","image","audio"],"output_modalities":["text"]},{"created":1749607866,"id":"gemini-2.5-flash-preview-05-20","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1425,"output_token_price_per_m":33250,"title":"gemini-2.5-flash-preview-05-20","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-flash-preview-05-20","model_type":"chat","max_output_tokens":200000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1749607804,"id":"gemini-2.5-pro-preview-06-05","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gemini-2.5-pro-preview-06-05","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.5-pro-preview-06-05","model_type":"chat","max_output_tokens":200000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1754029714,"id":"gemini-2.0-flash-lite","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":712,"output_token_price_per_m":2850,"title":"gemini-2.0-flash-lite","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.0-flash-lite","model_type":"chat","max_output_tokens":8192,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1749527597,"id":"gemini-2.0-flash-20250609","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1425,"output_token_price_per_m":5700,"title":"gemini-2.0-flash-20250609","description":"","tags":[],"context_size":1048576,"status":1,"display_name":"gemini-2.0-flash-20250609","model_type":"chat","max_output_tokens":200000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","gemini"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"]},{"created":1777096956,"id":"gpt-5.5-pro","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":300000,"output_token_price_per_m":1800000,"title":"gpt-5.5-pro","description":"","tags":[],"context_size":1050000,"status":1,"display_name":"gpt-5.5-pro","model_type":"chat","max_output_tokens":128000,"features":["function-calling","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1777096775,"id":"gpt-5.5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":50000,"output_token_price_per_m":300000,"title":"gpt-5.5","description":"","tags":[],"context_size":1050000,"status":1,"display_name":"gpt-5.5","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses","chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1773818708,"id":"gpt-5.4-nano","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":11875,"title":"gpt-5.4-nano","description":"Our cheapest GPT-5.4-class model for simple high-volume tasks","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.4-nano","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses","chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1773818628,"id":"gpt-5.4-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":7125,"output_token_price_per_m":42750,"title":"gpt-5.4-mini","description":"Our strongest mini model yet for coding, computer use, and subagents","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.4-mini","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses","chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1772765601,"id":"gpt-5.4-pro","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":285000,"output_token_price_per_m":1710000,"title":"gpt-5.4-pro","description":"","tags":[],"context_size":1050000,"status":1,"display_name":"gpt-5.4-pro","model_type":"chat","max_output_tokens":128000,"features":["function-calling","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1772763480,"id":"gpt-5.4","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":23750,"output_token_price_per_m":142500,"title":"gpt-5.4","description":"","tags":[],"context_size":1050000,"status":1,"display_name":"gpt-5.4","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses","chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1772676315,"id":"gpt-5.3-chat-latest","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16625,"output_token_price_per_m":133000,"title":"gpt-5.3-chat-latest","description":"","tags":[],"context_size":128000,"status":1,"display_name":"gpt-5.3-chat-latest","model_type":"chat","max_output_tokens":16000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1772073970,"id":"gpt-5.3-codex","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16625,"output_token_price_per_m":133000,"title":"gpt-5.3-codex","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.3-codex","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1768791542,"id":"gpt-5.2-codex","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":17500,"output_token_price_per_m":140000,"title":"gpt-5.2-codex","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.2-codex","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1765504819,"id":"gpt-5.2","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16625,"output_token_price_per_m":133000,"title":"gpt-5.2","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.2","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1765504807,"id":"gpt-5.2-pro","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":199500,"output_token_price_per_m":1596000,"title":"gpt-5.2-pro","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.2-pro","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1765504791,"id":"gpt-5.2-chat-latest","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":16625,"output_token_price_per_m":133000,"title":"gpt-5.2-chat-latest","description":"","tags":[],"context_size":128000,"status":1,"display_name":"gpt-5.2-chat-latest","model_type":"chat","max_output_tokens":16000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1765458748,"id":"gpt-5.1-codex-max","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5.1-codex-max","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.1-codex-max","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1765458656,"id":"gpt-5.1-codex-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2375,"output_token_price_per_m":19000,"title":"gpt-5.1-codex-mini","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.1-codex-mini","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1763088915,"id":"gpt-5.1-codex","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5.1-codex","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.1-codex","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1763088905,"id":"gpt-5.1-chat-latest","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5.1-chat-latest","description":"","tags":[],"context_size":128000,"status":1,"display_name":"gpt-5.1-chat-latest","model_type":"chat","max_output_tokens":16000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1763088883,"id":"gpt-5.1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5.1","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5.1","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1760003495,"id":"gpt-5-pro","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":142500,"output_token_price_per_m":1140000,"title":"gpt-5-pro","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5-pro","model_type":"chat","max_output_tokens":272000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["response"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1759997547,"id":"gpt-5-codex","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5-codex","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5-codex","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754620840,"id":"o4-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":10450,"output_token_price_per_m":41800,"title":"o4-mini","description":"","tags":[],"context_size":200000,"status":4,"display_name":"o4-mini","model_type":"chat","max_output_tokens":100000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754618746,"id":"gpt-5-chat-latest","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5-chat-latest","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5-chat-latest","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754618624,"id":"gpt-5-nano","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":475,"output_token_price_per_m":3800,"title":"gpt-5-nano","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5-nano","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754618452,"id":"gpt-5-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2375,"output_token_price_per_m":19000,"title":"gpt-5-mini","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5-mini","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754618298,"id":"gpt-5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":11875,"output_token_price_per_m":95000,"title":"gpt-5","description":"","tags":[],"context_size":400000,"status":1,"display_name":"gpt-5","model_type":"chat","max_output_tokens":128000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754438961,"id":"openai/gpt-oss-20b","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":500,"output_token_price_per_m":2000,"title":"openai/gpt-oss-20b","description":"gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.","tags":[],"context_size":131072,"status":1,"display_name":"OpenAI: GPT OSS 20B","model_type":"chat","max_output_tokens":32768,"features":["structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754438873,"id":"openai/gpt-oss-120b","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1000,"output_token_price_per_m":5000,"title":"openai/gpt-oss-120b","description":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.","tags":[],"context_size":131072,"status":1,"display_name":"OpenAI GPT OSS 120B","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1749695120,"id":"gpt-4.1-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":4000,"output_token_price_per_m":16000,"title":"gpt-4.1-mini","description":"","tags":[],"context_size":1047576,"status":1,"display_name":"gpt-4.1-mini","model_type":"chat","max_output_tokens":32768,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1749695075,"id":"gpt-4.1-nano","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1000,"output_token_price_per_m":4000,"title":"gpt-4.1-nano","description":"","tags":[],"context_size":1047576,"status":1,"display_name":"gpt-4.1-nano","model_type":"chat","max_output_tokens":32768,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1749695027,"id":"gpt-4.1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":20000,"output_token_price_per_m":80000,"title":"gpt-4.1","description":"","tags":[],"context_size":1047576,"status":1,"display_name":"gpt-4.1","model_type":"chat","max_output_tokens":32768,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747120799,"id":"o3","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":95000,"output_token_price_per_m":380000,"title":"o3","description":"chatgpt-o3","tags":[],"context_size":200000,"status":1,"display_name":"o3","model_type":"chat","max_output_tokens":100000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747120692,"id":"o3-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":10450,"output_token_price_per_m":41800,"title":"o3-mini","description":"chatgpt-o3-mini","tags":[],"context_size":200000,"status":1,"display_name":"o3-mini","model_type":"chat","max_output_tokens":100000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747120527,"id":"o1-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":10450,"output_token_price_per_m":41800,"title":"o1-mini","description":"chatgpt-o1-mini","tags":[],"context_size":128000,"status":1,"display_name":"o1-mini","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1747120473,"id":"o1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":142500,"output_token_price_per_m":570000,"title":"o1","description":"chatgpt-o1","tags":[],"context_size":200000,"status":1,"display_name":"o1","model_type":"chat","max_output_tokens":100000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1746682248,"id":"gpt-4o-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1425,"output_token_price_per_m":5700,"title":"gpt-4o-mini","description":"chatgpt-4o-mini","tags":[],"context_size":128000,"status":1,"display_name":"gpt-4o-mini","model_type":"chat","max_output_tokens":16384,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","responses"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1746682226,"id":"gpt-4o","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":23750,"output_token_price_per_m":95000,"title":"gpt-4o","description":"chatgpt-4o","tags":[],"context_size":131072,"status":1,"display_name":"gpt-4o","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1774425026,"id":"grok-4.20-0309-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":19000,"output_token_price_per_m":57000,"title":"grok-4.20-0309-reasoning","description":"Grok 4.20 is our newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4.20-0309-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1774424773,"id":"grok-4.20-0309-non-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":19000,"output_token_price_per_m":57000,"title":"grok-4.20-0309-non-reasoning","description":"Grok 4.20 is our newest flagship model with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4.20-0309-non-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1764249941,"id":"grok-4-1-fast-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":4750,"title":"grok-4-1-fast-reasoning","description":"","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4-1-fast-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1764249920,"id":"grok-4-1-fast-non-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":4750,"title":"grok-4-1-fast-non-reasoning","description":"","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4-1-fast-non-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1761102665,"id":"grok-code-fast-1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":14250,"title":"grok-code-fast-1","description":"","tags":[],"context_size":256000,"status":1,"display_name":"grok-code-fast-1","model_type":"chat","max_output_tokens":256000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1760620992,"id":"grok-4-fast-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":4750,"title":"grok-4-fast-reasoning","description":"","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4-fast-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1760620921,"id":"grok-4-fast-non-reasoning","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1900,"output_token_price_per_m":4750,"title":"grok-4-fast-non-reasoning","description":"","tags":[],"context_size":2000000,"status":1,"display_name":"grok-4-fast-non-reasoning","model_type":"chat","max_output_tokens":2000000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1753697174,"id":"grok-4-0709","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"grok-4-0709","description":"","tags":[],"context_size":256000,"status":1,"display_name":"grok-4-0709","model_type":"chat","max_output_tokens":256000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1754359807,"id":"grok-3","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":28500,"output_token_price_per_m":142500,"title":"grok-3","description":"","tags":[],"context_size":131072,"status":1,"display_name":"grok-3","model_type":"chat","max_output_tokens":32000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1760621096,"id":"grok-3-mini","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2850,"output_token_price_per_m":4750,"title":"grok-3-mini","description":"","tags":[],"context_size":131072,"status":1,"display_name":"grok-3-mini","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1769500051,"id":"deepseek/deepseek-ocr-2","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":300,"output_token_price_per_m":300,"title":"deepseek/deepseek-ocr-2","description":"DeepSeek-OCR 2 is a multimodal document recognition model released by DeepSeek AI, serving as an upgrade to DeepSeek-OCR. By introducing the DeepEncoder V2 architecture, it achieves a paradigm shift in visual encoding from \"fixed scanning\" to \"semantic reasoning.\" The model replaces the original CLIP encoder with a lightweight language model (Qwen2-0.5B) and incorporates a causal flow query mechanism, while retaining the DeepSeek-3B-MoE decoder. The model requires only 256 to 1120 visual tokens to cover complex document pages. On the OmniDocBench v1.5 benchmark, it achieves an overall score of 91.09%, a 3.73% improvement over its predecessor, with reading order recognition edit distance reduced from 0.085 to 0.057.","tags":[],"context_size":8192,"status":1,"display_name":"DeepSeek-OCR 2","model_type":"chat","max_output_tokens":8192,"features":["serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1755759094,"id":"deepseek/deepseek-v3.1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2700,"output_token_price_per_m":10000,"title":"deepseek/deepseek-v3.1","description":"DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode.DeepSeek-V3.1 is post-trained on the top of DeepSeek-V3.1-Base, which is built upon the original V3 base checkpoint through a two-phase long context extension approach, following the methodology outlined in the original DeepSeek-V3 report. We have expanded our dataset by collecting additional long documents and substantially extending both training phases. The 32K extension phase has been increased 10-fold to 630B tokens, while the 128K extension phase has been extended by 3.3x to 209B tokens.","tags":[],"context_size":163840,"status":1,"display_name":"DeepSeek V3.1","model_type":"chat","max_output_tokens":32768,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1748457624,"id":"deepseek/deepseek-r1-0528","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":7000,"output_token_price_per_m":25000,"title":"deepseek/deepseek-r1-0528","description":"DeepSeek R1 0528 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","tags":[],"context_size":163840,"status":1,"display_name":"DeepSeek R1 0528","model_type":"chat","max_output_tokens":32768,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions","batch-api"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1742909352,"id":"deepseek/deepseek-v3-0324","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2800,"output_token_price_per_m":11400,"title":"deepseek/deepseek-v3-0324","description":"DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.","tags":[],"context_size":163840,"status":1,"display_name":"DeepSeek V3 0324","model_type":"chat","max_output_tokens":163840,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1770905461,"id":"minimax/minimax-m2.5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":3000,"output_token_price_per_m":12000,"title":"minimax/minimax-m2.5","description":"MiniMax M2.1 is a cutting-edge AI model designed to revolutionize how developers build software. With enhanced multi-language programming support, it excels in generating high-quality code across popular languages like Rust, Java, Golang, C++, Kotlin, Objective-C, TypeScript, and JavaScript.\n\nKey improvements include:\n\n22% faster response times and 30% lower token consumption for efficient workflows.\nSeamless integration with leading development frameworks (Claude Code, Droid Factory AI, BlackBox, etc.).\nFull-stack development capabilities, from mobile (Android/iOS) to web and 3D interactive prototyping.\nOptimized performance-to-cost ratio, making AI-assisted development more accessible.\nWhether you're a software engineer, app developer, or tech innovator, M2.1 empowers smarter coding with industry-leading AI.","tags":[],"context_size":204800,"status":1,"display_name":"MiniMax M2.5","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1766490593,"id":"minimax/minimax-m2.1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":3000,"output_token_price_per_m":12000,"title":"minimax/minimax-m2.1","description":"MiniMax M2.1 is a cutting-edge AI model designed to revolutionize how developers build software. With enhanced multi-language programming support, it excels in generating high-quality code across popular languages like Rust, Java, Golang, C++, Kotlin, Objective-C, TypeScript, and JavaScript.\n\nKey improvements include:\n\n22% faster response times and 30% lower token consumption for efficient workflows.\nSeamless integration with leading development frameworks (Claude Code, Droid Factory AI, BlackBox, etc.).\nFull-stack development capabilities, from mobile (Android/iOS) to web and 3D interactive prototyping.\nOptimized performance-to-cost ratio, making AI-assisted development more accessible.\nWhether you're a software engineer, app developer, or tech innovator, M2.1 empowers smarter coding with industry-leading AI.","tags":[],"context_size":204800,"status":1,"display_name":"Minimax M2.1","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1750139830,"id":"minimaxai/minimax-m1-80k","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":5500,"output_token_price_per_m":22000,"title":"minimaxai/minimax-m1-80k","description":"MiniMax-M1: The World's First Open-Weight, Large-Scale Hybrid Attention Inference Model  MiniMax-M1 adopts a Mixture of Experts (MoE) architecture and integrates the Flash Attention mechanism. The model contains a total of 456 billion parameters, with 45.9 billion parameters activated per token.  Natively, the M1 model supports a context length of 1 million tokens—8 times that of DeepSeek R1. Additionally, by combining the CISPO algorithm with an efficient hybrid attention design for reinforcement learning training, MiniMax-M1 achieves industry-leading performance in long-context reasoning and real-world software engineering scenarios.","tags":[],"context_size":1000000,"status":1,"display_name":"MiniMax M1","model_type":"chat","max_output_tokens":40000,"features":["function-calling","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1770829181,"id":"zai-org/glm-5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":10000,"output_token_price_per_m":32000,"title":"zai-org/glm-5","description":"GLM-5 is an open-source foundation model engineered for complex system engineering and long-horizon Agent tasks, delivering reliable productivity for top-tier programmers. Transcending the boundary from \"writing code\" to \"building systems,\" it moves beyond traditional snippet generation to offer senior-architect-level planning and execution capabilities. By rejecting the \"frontend-heavy, logic-light\" approach, GLM-5 demonstrates exceptional reasoning and self-healing abilities in backend refactoring, complex algorithm implementation, and deep debugging—autonomously analyzing logs and iteratively fixing persistent bugs until the system runs. As the first open-source model featuring Opus-class style and system engineering depth, GLM-5 provides extreme logic density alongside the freedom of local deployment and high cost-effectiveness, making it the ideal choice for large-scale backend development and automated Agent construction.","tags":[],"context_size":204800,"status":1,"display_name":"GLM-5","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic","completion"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1770173383,"id":"zai-org/glm-ocr","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":300,"output_token_price_per_m":300,"title":"zai-org/glm-ocr","description":"GLM-OCR is a lightweight professional OCR model with only 0.9B parameters, achieving SOTA performance with a score of 94.62 on OmniDocBench V1.5. Optimized for real-world business scenarios, it delivers high-precision recognition for handwritten text, stamps, and code documentation. The model supports directly rendering complex tables into HTML code and extracting structured data from IDs and receipts into standard JSON format, enabling high-accuracy document parsing with minimal resource consumption.","tags":[],"context_size":32000,"status":1,"display_name":"GLM-OCR","model_type":"chat","max_output_tokens":32000,"features":["serverless","structured-outputs"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1768835606,"id":"zai-org/glm-4.7-flash","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":700,"output_token_price_per_m":4000,"title":"zai-org/glm-4.7-flash","description":"GLM-4.7-Flash, a state-of-the-art model in the 30B class, delivers a compelling balance of high performance and efficiency. Tailored for Agentic Coding, it strengthens coding proficiency, long-horizon planning, and tool synergy, securing top-tier results on public benchmarks among similarly sized open-source models. It excels in complex agent tasks with superior instruction following for tool use, while significantly elevating the frontend aesthetics and completion efficiency of long-range workflows in Artifacts and Agentic Coding.","tags":[],"context_size":200000,"status":1,"display_name":"GLM-4.7-Flash","model_type":"chat","max_output_tokens":128000,"features":["serverless","function-calling","structured-outputs","reasoning"],"endpoints":["chat/completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1766425351,"id":"zai-org/glm-4.7","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":6000,"output_token_price_per_m":22000,"title":"zai-org/glm-4.7","description":"GLM-4.7 is Z.AI's latest flagship model, with major upgrades focused on advanced coding capabilities and more reliable multi-step reasoning and execution. It shows clear gains in complex agent workflows, while delivering a more natural conversational experience and stronger front-end design sensibility.","tags":[],"context_size":204800,"status":1,"display_name":"GLM-4.7","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","anthropic","completion"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1754914926,"id":"zai-org/glm-4.5v","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":6000,"output_token_price_per_m":18000,"title":"zai-org/glm-4.5v","description":"Z.ai's GLM-4.5V sets a new standard in visual reasoning, achieving SOTA performance across 42 benchmarks among open-source models. Beyond benchmarks, it excels in real-world applications through hybrid training, enabling comprehensive visual understanding—from image/video analysis and GUI interaction to complex document processing and precise visual grounding.  In China's GeoGuessr challenge, GLM-4.5V surpassed 99% of 21,000 human players within 16 hours, reaching 66th place in a week. Built on the GLM-4.5-Air foundation and inheriting GLM-4.1V-Thinking's approach, it leverages a 106B-parameter MoE architecture for scalable, efficient performance. This model bridges advanced AI research with practical deployment, delivering unmatched visual intelligence","tags":[],"context_size":65536,"status":1,"display_name":"GLM 4.5V","model_type":"chat","max_output_tokens":16384,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image","video"],"output_modalities":["text"]},{"created":1753709673,"id":"zai-org/glm-4.5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":6000,"output_token_price_per_m":22000,"title":"zai-org/glm-4.5","description":"GLM-4.5 Series Models are foundation models specifically engineered for intelligent agents. The flagship GLM-4.5 integrates 355 billion total parameters (32 billion active), unifying reasoning, coding, and agent capabilities to address complex application demands. As a hybrid reasoning system, it offers dual operational modes: - Thinking Mode: Enables complex reasoning, tool invocation, and strategic planning - Non-Thinking Mode: Delivers low-latency responses for real-time interactions This architecture bridges high-performance AI with adaptive functionality for dynamic agent environments.","tags":[],"context_size":131072,"status":1,"display_name":"GLM-4.5","model_type":"chat","max_output_tokens":98304,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1769498625,"id":"moonshotai/kimi-k2.5","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":6000,"output_token_price_per_m":30000,"title":"moonshotai/kimi-k2.5","description":"Kimi K2.5 is the latest flagship iteration of Moonshot AI's large language model series, representing a significant leap in multimodal and agentic capabilities. It features a native multimodal architecture supporting both visual and text inputs, alongside versatile thinking and non-thinking modes. This model maintains the substantial 256k token context window found in the K2 series but achieves new open-source state-of-the-art (SoTA) performance across general intelligence, coding, and visual understanding benchmarks. Kimi K2.5 delivers a breakthrough in frontend development, enabling the generation of fully functional, aesthetically polished interactive interfaces with complex dynamic layouts directly from natural language. Optimized for complex problem-solving, it excels in multi-step tool invocation, logical reasoning, and full-stack code synthesis.","tags":[],"context_size":262144,"status":1,"display_name":"Kimi K2.5","model_type":"chat","max_output_tokens":262144,"features":["serverless","reasoning","structured-outputs","function-calling"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image","video"],"output_modalities":["text"]},{"created":1757052991,"id":"moonshotai/kimi-k2-0905","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":6000,"output_token_price_per_m":25000,"title":"moonshotai/kimi-k2-0905","description":"Kimi K2 0905 is the September update of Kimi K2 0711. It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k. This update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.","tags":[],"context_size":262144,"status":1,"display_name":"Kimi K2 0905","model_type":"chat","max_output_tokens":262144,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1752263515,"id":"moonshotai/kimi-k2-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":5700,"output_token_price_per_m":23000,"title":"moonshotai/kimi-k2-instruct","description":"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.Specifically designed for tool use, reasoning, and autonomous problem-solving.","tags":[],"context_size":131072,"status":1,"display_name":"Kimi K2 Instruct","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1767753130,"id":"doubao-seed-1-8-251228","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1103,"output_token_price_per_m":11034,"title":"doubao-seed-1-8-251228","description":"","tags":[],"context_size":256000,"status":1,"display_name":"doubao-seed-1-8-251228","model_type":"chat","max_output_tokens":64000,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image","video"],"output_modalities":["text"]},{"created":1762496471,"id":"doubao-1-5-pro-32k-250115","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1100,"output_token_price_per_m":2750,"title":"doubao-1-5-pro-32k-250115","description":"","tags":[],"context_size":128000,"status":1,"display_name":"doubao-1-5-pro-32k-250115","model_type":"chat","max_output_tokens":12000,"features":["function-calling","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1761812776,"id":"doubao-1.5-pro-32k-character-250715","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1100,"output_token_price_per_m":2750,"title":"doubao-1.5-pro-32k-character-250715","description":"","tags":[],"context_size":200000,"status":1,"display_name":"doubao-1.5-pro-32k-character-250715","model_type":"chat","max_output_tokens":64000,"features":["function-calling","serverless"],"endpoints":["chat/completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1770156911,"id":"qwen/qwen3-coder-next","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2000,"output_token_price_per_m":15000,"title":"qwen/qwen3-coder-next","description":"Qwen3-Coder-Next is an open-weight language model specifically engineered for coding agents and local development environments. This highly efficient model delivers exceptional performance with only 3B activated parameters out of 80B total parameters, achieving results comparable to models with 10-20x more active parameters while maintaining remarkable cost-effectiveness for agent deployment. Through its sophisticated training methodology, Qwen3-Coder-Next excels in advanced agentic capabilities including long-horizon reasoning, complex tool usage, and robust recovery from execution failures, ensuring reliable performance across dynamic coding tasks. The model's versatility is further enhanced by its 256k context length and adaptability to various scaffold templates, enabling seamless integration with diverse CLI/IDE platforms such as Claude Code, Qwen Code, Qoder, Kilo, Trae, and Cline, making it an ideal solution for comprehensive development environments.","tags":[],"context_size":262144,"status":1,"display_name":"Qwen3 Coder Next FP8","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completion","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1757534155,"id":"qwen/qwen3-next-80b-a3b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1500,"output_token_price_per_m":15000,"title":"qwen/qwen3-next-80b-a3b-instruct","description":"Qwen3-Next uses a highly sparse MoE design: 80B total parameters, but only ~3B activated per inference step. Experiments show that, with global load balancing, increasing total expert parameters while keeping activated experts fixed steadily reduces training loss.Compared to Qwen3’s MoE (128 total experts, 8 routed), Qwen3-Next expands to 512 total experts, combining 10 routed experts + 1 shared expert — maximizing resource usage without hurting performance. The Qwen3-Next-80B-A3B-Instruct performs comparably to our flagship model Qwen3-235B-A22B-Instruct-2507, and shows clear advantages in tasks requiring ultra-long context (up to 256K tokens). ","tags":[],"context_size":65536,"status":1,"display_name":"Qwen3 Next 80B A3B Instruct","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1757533943,"id":"qwen/qwen3-next-80b-a3b-thinking","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1500,"output_token_price_per_m":15000,"title":"qwen/qwen3-next-80b-a3b-thinking","description":"Qwen3-Next uses a highly sparse MoE design: 80B total parameters, but only ~3B activated per inference step. Experiments show that, with global load balancing, increasing total expert parameters while keeping activated experts fixed steadily reduces training loss.Compared to Qwen3’s MoE (128 total experts, 8 routed), Qwen3-Next expands to 512 total experts, combining 10 routed experts + 1 shared expert — maximizing resource usage without hurting performance. The Qwen3-Next-80B-A3B-Thinking excels at complex reasoning tasks — outperforming higher-cost models like Qwen3-30B-A3B-Thinking-2507 and Qwen3-32B-Thinking, outpeforming the closed-source Gemini-2.5-Flash-Thinking on multiple benchmarks, and approaching the performance of our top-tier model Qwen3-235B-A22B-Thinking-2507.","tags":[],"context_size":65536,"status":1,"display_name":"Qwen3 Next 80B A3B Thinking","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1756888269,"id":"qwen/qwen-mt-plus","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2500,"output_token_price_per_m":7500,"title":"qwen/qwen-mt-plus","description":"Qwen-MT is a large language model optimized for machine translation, built upon the foundation of the Tongyi Qianwen model. It supports translation across 92 languages — including Chinese, English, Japanese, Korean, French, Spanish, German, Thai, Indonesian, Vietnamese, Arabic, and more — enabling seamless multilingual communication.","tags":[],"context_size":4096,"status":1,"display_name":"Qwen MT Plus","model_type":"chat","max_output_tokens":2048,"features":["serverless"],"endpoints":["chat/completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1753443150,"id":"qwen/qwen3-235b-a22b-thinking-2507","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":3000,"output_token_price_per_m":30000,"title":"qwen/qwen3-235b-a22b-thinking-2507","description":"The Qwen3-235B-A22B-Thinking-2507 represents the newest thinking-enabled model in the Qwen3 series, delivering groundbreaking improvements in reasoning capabilities. This advanced AI demonstrates significantly enhanced performance across logical reasoning, mathematics, scientific analysis, coding tasks, and academic benchmarks - matching or even surpassing human-expert level performance to achieve state-of-the-art results among open-source thinking models. Beyond its exceptional reasoning skills, the model shows markedly better general capabilities including more precise instruction following, sophisticated tool usage, highly natural text generation, and improved alignment with human preferences. It also features enhanced 256K long-context understanding, allowing it to maintain coherence and depth across extended documents and complex discussions.","tags":[],"context_size":131072,"status":1,"display_name":"Qwen3 235B A22b Thinking 2507","model_type":"chat","max_output_tokens":131072,"features":["function-calling","structured-outputs","reasoning","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1753233789,"id":"qwen/qwen3-coder-480b-a35b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2900,"output_token_price_per_m":12000,"title":"qwen/qwen3-coder-480b-a35b-instruct","description":"Qwen3-Coder-480B-A35B-Instruct is a cutting-edge open coding model from Qwen, matching Claude Sonnet’s performance in agentic programming, browser automation, and core development tasks. With native 256K context (extendable to 1M tokens via YaRN), it excels at repository-scale analysis and features specialized function-call support for platforms like Qwen Code and CLINE—making it ideal for complex, real-world development workflows.","tags":[],"context_size":262144,"status":1,"display_name":"Qwen3 Coder 480B A35B Instruct","model_type":"chat","max_output_tokens":65536,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1753176794,"id":"qwen/qwen3-235b-a22b-instruct-2507","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1500,"output_token_price_per_m":8000,"title":"qwen/qwen3-235b-a22b-instruct-2507","description":"Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" (\u003cthink\u003e blocks). Compared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.","tags":[],"context_size":131072,"status":1,"display_name":"Qwen3 235B A22B Instruct 2507","model_type":"chat","max_output_tokens":16384,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1745897181,"id":"qwen/qwen3-30b-a3b-fp8","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":900,"output_token_price_per_m":4500,"title":"qwen/qwen3-30b-a3b-fp8","description":"Achieves effective integration of inference and non-inference modes, allowing seamless switching between modes during conversations. Its inference capability matches that of QwQ-32B with a smaller parameter size, and its general capabilities significantly surpass those of Qwen2.5-14B, reaching the state-of-the-art (SOTA) level among models of the same scale.","tags":[],"context_size":40960,"status":1,"display_name":"Qwen3 30B A3B","model_type":"chat","max_output_tokens":20000,"features":["reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1745897287,"id":"qwen/qwen3-32b-fp8","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1000,"output_token_price_per_m":4500,"title":"qwen/qwen3-32b-fp8","description":"Achieves effective integration of inference and non-inference modes, allowing seamless switching between modes during conversations. Its inference capability matches that of QwQ-32B with a smaller parameter size, and its general capabilities significantly surpass those of Qwen2.5-14B, reaching the state-of-the-art (SOTA) level among models of the same scale.","tags":[],"context_size":40960,"status":1,"display_name":"Qwen3 32B","model_type":"chat","max_output_tokens":20000,"features":["reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1745897024,"id":"qwen/qwen3-235b-a22b-fp8","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2000,"output_token_price_per_m":8000,"title":"qwen/qwen3-235b-a22b-fp8","description":"Achieves effective integration of inference and non-inference modes, enabling seamless switching between modes during conversations. The model's inference capability significantly surpasses that of QwQ, and its general capabilities exceed those of Qwen2.5-72B-Instruct, reaching the state-of-the-art (SOTA) level among models of the same scale.","tags":[],"context_size":40960,"status":1,"display_name":"Qwen3 235B A22B","model_type":"chat","max_output_tokens":20000,"features":["reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1744797581,"id":"qwen/qwen2.5-7b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":700,"output_token_price_per_m":700,"title":"qwen/qwen2.5-7b-instruct","description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains. - Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots. - Long-context Support up to 128K tokens and can generate up to 8K tokens. - Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.","tags":[],"context_size":32000,"status":1,"display_name":"Qwen2.5 7B Instruct","model_type":"chat","max_output_tokens":32000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1742888969,"id":"qwen/qwen2.5-vl-72b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":8000,"output_token_price_per_m":8000,"title":"qwen/qwen2.5-vl-72b-instruct","description":"Qwen2.5-VL, the latest vision-language model in the Qwen2.5 series, delivers enhanced multimodal capabilities including advanced visual comprehension for object/text recognition, chart/layout analysis, and agent-based dynamic tool orchestration. It processes long-form videos (\u003e1 hour) with key event detection while enabling precise spatial annotation through bounding boxes or coordinate points. The model specializes in structured data extraction from scanned documents (invoices, tables, etc.) and achieves state-of-the-art performance across multimodal benchmarks encompassing image understanding, temporal video analysis, and agent task evaluations.","tags":[],"context_size":32768,"status":1,"display_name":"Qwen2.5 VL 72B Instruct","model_type":"chat","max_output_tokens":32768,"features":["serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image","video"],"output_modalities":["text"]},{"created":1728962258,"id":"qwen/qwen-2.5-72b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":3800,"output_token_price_per_m":4000,"title":"qwen/qwen-2.5-72b-instruct","description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.","tags":[],"context_size":32000,"status":1,"display_name":"Qwen 2.5 72B Instruct","model_type":"chat","max_output_tokens":8192,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1751258620,"id":"baidu/ernie-4.5-vl-424b-a47b","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":4200,"output_token_price_per_m":12500,"title":"baidu/ernie-4.5-vl-424b-a47b","description":"The ERNIE 4.5 series of open-source models adopts a Mixture-of-Experts (MoE) architecture, representing an innovative multimodal heterogeneous model structure. It achieves cross-modal knowledge fusion through a parameter-sharing mechanism while retaining dedicated parameter spaces for individual modalities. This architecture is particularly well-suited for the continuous pre-training paradigm from large language models to multimodal models, significantly enhancing multimodal understanding capabilities while maintaining or even improving performance in text-based tasks. The models are efficiently trained, inferred, and deployed using the PaddlePaddle deep learning framework. During the pre-training of large language models, the Model FLOPs Utilization (MFU) reaches 47%. Experimental results demonstrate that this series of models achieves state-of-the-art (SOTA) performance across multiple text and multimodal benchmarks, with particularly outstanding results in instruction following, world knowledge memorizatio","tags":[],"context_size":123000,"status":1,"display_name":"ERNIE 4.5 VL 424B A47B","model_type":"chat","max_output_tokens":16000,"features":["function-calling","reasoning","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1751243379,"id":"baidu/ernie-4.5-300b-a47b-paddle","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":2800,"output_token_price_per_m":11000,"title":"baidu/ernie-4.5-300b-a47b-paddle","description":"The ERNIE 4.5 series of open-source models adopts a Mixture-of-Experts (MoE) architecture, representing an innovative multimodal heterogeneous model structure. It achieves cross-modal knowledge fusion through a parameter-sharing mechanism while retaining dedicated parameter spaces for individual modalities. This architecture is particularly well-suited for the continuous pre-training paradigm from large language models to multimodal models, significantly enhancing multimodal understanding capabilities while maintaining or even improving performance in text-based tasks. The models are efficiently trained, inferred, and deployed using the PaddlePaddle deep learning framework. During the pre-training of large language models, the Model FLOPs Utilization (MFU) reaches 47%. Experimental results demonstrate that this series of models achieves state-of-the-art (SOTA) performance across multiple text and multimodal benchmarks, with particularly outstanding results in instruction following, world knowledge memorizatio","tags":[],"context_size":123000,"status":1,"display_name":"ERNIE 4.5 300B A47B","model_type":"chat","max_output_tokens":12000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1743906990,"id":"meta-llama/llama-4-maverick-17b-128e-instruct-fp8","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1700,"output_token_price_per_m":8500,"title":"meta-llama/llama-4-maverick-17b-128e-instruct-fp8","description":"Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.  Maverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.","tags":[],"context_size":1048576,"status":1,"display_name":"Llama 4 Maverick Instruct","model_type":"chat","max_output_tokens":1048576,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1743906925,"id":"meta-llama/llama-4-scout-17b-16e-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1000,"output_token_price_per_m":5000,"title":"meta-llama/llama-4-scout-17b-16e-instruct","description":"Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.  Built for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.","tags":[],"context_size":131072,"status":1,"display_name":"Llama 4 Scout Instruct","model_type":"chat","max_output_tokens":131072,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1733560109,"id":"meta-llama/llama-3.3-70b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1300,"output_token_price_per_m":3900,"title":"meta-llama/llama-3.3-70b-instruct","description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.  Supported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.","tags":[],"context_size":131072,"status":1,"display_name":"Llama 3.3 70B Instruct","model_type":"chat","max_output_tokens":120000,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1732607748,"id":"meta-llama/llama-3.2-3b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":300,"output_token_price_per_m":500,"title":"meta-llama/llama-3.2-3b-instruct","description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out)","tags":[],"context_size":32768,"status":1,"display_name":"Llama 3.2 3B Instruct","model_type":"chat","max_output_tokens":32000,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1721801867,"id":"meta-llama/llama-3.1-8b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":200,"output_token_price_per_m":500,"title":"meta-llama/llama-3.1-8b-instruct","description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.","tags":[],"context_size":16384,"status":1,"display_name":"Llama 3.1 8B Instruct","model_type":"chat","max_output_tokens":16384,"features":["serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1722337858,"id":"mistralai/mistral-nemo","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":400,"output_token_price_per_m":1700,"title":"mistralai/mistral-nemo","description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi. It supports function calling and is released under the Apache 2.0 license.","tags":[],"context_size":60288,"status":1,"display_name":"Mistral Nemo","model_type":"chat","max_output_tokens":32000,"features":["structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1719492913,"id":"mistralai/mistral-7b-instruct","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":290,"output_token_price_per_m":590,"title":"mistralai/mistral-7b-instruct","description":"A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.","tags":[],"context_size":32768,"status":1,"display_name":"Mistral 7B Instruct","model_type":"chat","max_output_tokens":8192,"features":["serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1764832218,"id":"nova-2-Lite","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":3000,"output_token_price_per_m":25000,"title":"nova-2-Lite","description":"","tags":[],"context_size":1000000,"status":1,"display_name":"nova-2-Lite","model_type":"chat","max_output_tokens":1000000,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","anthropic"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1766142382,"id":"xiaomimimo/mimo-v2-flash","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1000,"output_token_price_per_m":3000,"title":"xiaomimimo/mimo-v2-flash","description":"","tags":[],"context_size":262144,"status":1,"display_name":"XiaomiMiMo/MiMo-V2-Flash","model_type":"chat","max_output_tokens":131072,"features":["serverless","function-calling","structured-outputs","reasoning"],"endpoints":["chat/completions","completion","anthropic"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1742889385,"id":"google/gemma-3-27b-it","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":1190,"output_token_price_per_m":2000,"title":"google/gemma-3-27b-it","description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 32k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs. Gemma 3 27B is Google's latest open source model, successor to Gemma.","tags":[],"context_size":32768,"status":1,"display_name":"Gemma 3 27B","model_type":"chat","max_output_tokens":32768,"features":["structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text","image"],"output_modalities":["text"]},{"created":1732875917,"id":"Sao10K/L3-8B-Stheno-v3.2","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":500,"output_token_price_per_m":500,"title":"Sao10K/L3-8B-Stheno-v3.2","description":"Sao10K/L3-8B-Stheno-v3.2 is a highly skilled actor that excels at fully immersing itself in any role assigned.","tags":[],"context_size":8192,"status":1,"display_name":"L3 8B Stheno V3.2","model_type":"chat","max_output_tokens":32000,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1732791714,"id":"sao10k/l3-8b-lunaris","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":500,"output_token_price_per_m":500,"title":"sao10k/l3-8b-lunaris","description":"A generalist / roleplaying model merge based on Llama 3.","tags":[],"context_size":8192,"status":1,"display_name":"Sao10k L3 8B Lunaris\t","model_type":"chat","max_output_tokens":8192,"features":["function-calling","structured-outputs","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1726742141,"id":"sao10k/l31-70b-euryale-v2.2","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":14800,"output_token_price_per_m":14800,"title":"sao10k/l31-70b-euryale-v2.2","description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from Sao10k. It is the successor of Euryale L3 70B v2.1.","tags":[],"context_size":8192,"status":1,"display_name":"L31 70B Euryale V2.2","model_type":"chat","max_output_tokens":8192,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1718699128,"id":"sao10k/l3-70b-euryale-v2.1","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":14800,"output_token_price_per_m":14800,"title":"sao10k/l3-70b-euryale-v2.1","description":"The uncensored llama3 model is a powerhouse of creativity, excelling in both roleplay and story writing. It offers a liberating experience during roleplays, free from any restrictions. This model stands out for its immense creativity, boasting a vast array of unique ideas and plots, truly a treasure trove for those seeking originality. Its unrestricted nature during roleplays allows for the full breadth of imagination to unfold, akin to an enhanced, big-brained version of Stheno. Perfect for creative minds seeking a boundless platform for their imaginative expressions, the uncensored llama3 model is an ideal choice","tags":[],"context_size":8192,"status":1,"display_name":"L3 70B Euryale V2.1\t","model_type":"chat","max_output_tokens":8192,"features":["function-calling","serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]},{"created":1714024873,"id":"gryphe/mythomax-l2-13b","object":"model","owned_by":"unknown","permission":null,"root":"","parent":"","input_token_price_per_m":900,"output_token_price_per_m":900,"title":"gryphe/mythomax-l2-13b","description":"The idea behind this merge is that each layer is composed of several tensors, which are in turn responsible for specific functions. Using MythoLogic-L2's robust understanding as its input and Huginn's extensive writing capability as its output seems to have resulted in a model that exceeds at both, confirming my theory. (More details to be released at a later time).","tags":[],"context_size":4096,"status":1,"display_name":"Mythomax L2 13B","model_type":"chat","max_output_tokens":32000,"features":["serverless"],"endpoints":["chat/completions","completions"],"input_modalities":["text"],"output_modalities":["text"]}]}
