{"object":"list","data":[{"id":"alibaba/qwen-3-14b","object":"model","created":1755815280,"released":1745798400,"owned_by":"alibaba","name":"Qwen3-14B","description":"Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support","context_window":40960,"max_tokens":16384,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000012","output":"0.00000024"}},{"id":"alibaba/qwen-3-235b","object":"model","created":1755815280,"released":1745798400,"owned_by":"alibaba","name":"Qwen3 235B A22B","description":"","context_window":262144,"max_tokens":16384,"type":"language","tags":["tool-use","reasoning"],"pricing":{"input":"0.00000022","output":"0.00000088"}},{"id":"alibaba/qwen-3-30b","object":"model","created":1755815280,"released":1745798400,"owned_by":"alibaba","name":"Qwen3-30B-A3B","description":"Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support","context_window":40960,"max_tokens":16384,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000012","output":"0.0000005"}},{"id":"alibaba/qwen-3-32b","object":"model","created":1755815280,"released":1745798400,"owned_by":"alibaba","name":"Qwen 3 32B","description":"Qwen3-32B is a world-class model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning, making it an exceptional model for a wide range of production use cases.","context_window":128000,"max_tokens":8192,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000016","output":"0.00000064"}},{"id":"alibaba/qwen-3.6-max-preview","object":"model","created":1755815280,"released":1776643200,"owned_by":"alibaba","name":"Qwen 3.6 Max Preview","description":"Compared with the previously released Qwen3-Max and Qwen3.6-Plus, this model features enhanced vibe coding abilities, more efficient coding agent execution, and significantly improved front-end development skills. Additionally, its long-tail knowledge retention has been further upgraded.","context_window":240000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.0000013","input_tiers":[{"cost":"0.0000013","min":0,"max":128000},{"cost":"0.000002","min":128000}],"output":"0.0000078","output_tiers":[{"cost":"0.0000078","min":0,"max":128000},{"cost":"0.000012","min":128000}],"input_cache_read":"0.00000026","input_cache_read_tiers":[{"cost":"0.00000026","min":0,"max":128000},{"cost":"0.0000002","min":128000}],"input_cache_write":"0.000001625","input_cache_write_tiers":[{"cost":"0.000001625","min":0,"max":128000},{"cost":"0.0000025","min":128000}]}},{"id":"alibaba/qwen3-235b-a22b-thinking","object":"model","created":1755815280,"released":1758585600,"owned_by":"alibaba","name":"Qwen3 VL 235B A22B Thinking","description":"Qwen3 series VL models feature significantly enhanced multimodal reasoning capabilities, with a particular focus on optimizing the model for STEM and mathematical reasoning. Visual perception and recognition abilities have been comprehensively improved, and OCR capabilities have undergone a major upgrade.","context_window":131072,"max_tokens":32768,"type":"language","tags":["vision","reasoning","tool-use","file-input"],"pricing":{"input":"0.0000004","output":"0.000004"}},{"id":"alibaba/qwen3-coder","object":"model","created":1755815280,"released":1753142400,"owned_by":"alibaba","name":"Qwen3 Coder 480B A35B Instruct","description":"Qwen3-Coder-480B-A35B-Instruct is a cutting-edge open coding model from Qwen, matching Claude Sonnet’s performance in agentic programming, browser automation, and core development tasks.","context_window":262144,"max_tokens":65536,"type":"language","tags":["tool-use","implicit-caching"],"pricing":{"input":"0.0000015","input_tiers":[{"cost":"0.0000015","min":0,"max":32001},{"cost":"0.0000027","min":32001,"max":128001},{"cost":"0.0000045","min":128001}],"output":"0.0000075","output_tiers":[{"cost":"0.0000075","min":0,"max":32001},{"cost":"0.0000135","min":32001,"max":128001},{"cost":"0.0000225","min":128001}],"input_cache_read":"0.0000003","input_cache_read_tiers":[{"cost":"0.0000003","min":0,"max":32001},{"cost":"0.00000054","min":32001,"max":128001},{"cost":"0.0000009","min":128001}]}},{"id":"alibaba/qwen3-coder-30b-a3b","object":"model","created":1755815280,"released":1753920000,"owned_by":"alibaba","name":"Qwen 3 Coder 30B A3B Instruct","description":"Efficient coding specialist balancing performance with cost-effectiveness for daily development tasks while maintaining strong tool integration capabilities.","context_window":262144,"max_tokens":8192,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000015","output":"0.0000006"}},{"id":"alibaba/qwen3-coder-next","object":"model","created":1755815280,"released":1753142400,"owned_by":"alibaba","name":"Qwen3 Coder Next","description":"Qwen3-Coder-Next is an open-weight language model built specifically for coding, with strong performance on large-scale software engineering and agentic coding benchmarks. It uses a hybrid Mixture-of-Experts architecture to offer high capability at relatively modest active parameter counts, improving efficiency for real-world deployments. The model is trained on diverse code and natural language data so it can handle tasks like code generation, refactoring, debugging, repository-level reasoning, and technical explanation across multiple programming languages. It is also optimized for tool use and function calling, making it suitable as the core of coding agents that interact with shells, editors, issue trackers, and other developer tools.","context_window":256000,"max_tokens":256000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000005","output":"0.0000012"}},{"id":"alibaba/qwen3-coder-plus","object":"model","created":1755815280,"released":1753228800,"owned_by":"alibaba","name":"Qwen3 Coder Plus","description":"Powered by Qwen3 this is a powerful Coding Agent that excels in tool calling and environment interaction to achieve autonomous programming. It combines outstanding coding proficiency with versatile general-purpose abilities.","context_window":1000000,"max_tokens":65536,"type":"language","tags":["implicit-caching","tool-use"],"pricing":{"input":"0.000001","input_tiers":[{"cost":"0.000001","min":0,"max":32001},{"cost":"0.0000018","min":32001,"max":128001},{"cost":"0.000003","min":128001,"max":256001},{"cost":"0.000006","min":256001}],"output":"0.000005","output_tiers":[{"cost":"0.000005","min":0,"max":32001},{"cost":"0.000009","min":32001,"max":128001},{"cost":"0.000015","min":128001,"max":256001},{"cost":"0.00006","min":256001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":32001},{"cost":"0.00000036","min":32001,"max":128001},{"cost":"0.0000006","min":128001,"max":256001},{"cost":"0.0000012","min":256001}]}},{"id":"alibaba/qwen3-embedding-0.6b","object":"model","created":1755815280,"released":1763078400,"owned_by":"alibaba","name":"Qwen3 Embedding 0.6B","description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B).","context_window":32768,"max_tokens":32768,"type":"embedding","pricing":{"input":"0.00000001"}},{"id":"alibaba/qwen3-embedding-4b","object":"model","created":1755815280,"released":1749081600,"owned_by":"alibaba","name":"Qwen3 Embedding 4B","description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B).","context_window":32768,"max_tokens":32768,"type":"embedding","pricing":{"input":"0.00000002"}},{"id":"alibaba/qwen3-embedding-8b","object":"model","created":1755815280,"released":1749081600,"owned_by":"alibaba","name":"Qwen3 Embedding 8B","description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. Building upon the dense foundational models of the Qwen3 series, it provides a comprehensive range of text embeddings and reranking models in various sizes (0.6B, 4B, and 8B).","context_window":32768,"max_tokens":32768,"type":"embedding","pricing":{"input":"0.00000005"}},{"id":"alibaba/qwen3-max","object":"model","created":1755815280,"released":1758585600,"owned_by":"alibaba","name":"Qwen3 Max","description":"The Qwen 3 series Max model has undergone specialized upgrades in agent programming and tool invocation compared to the preview version. The officially released model this time has achieved state-of-the-art (SOTA) performance in its field and is better suited to meet the demands of agents operating in more complex scenarios.","context_window":262144,"max_tokens":32768,"type":"language","tags":["tool-use","implicit-caching"],"pricing":{"input":"0.0000012","input_tiers":[{"cost":"0.0000012","min":0,"max":32001},{"cost":"0.0000024","min":32001,"max":128001},{"cost":"0.000003","min":128001}],"output":"0.000006","output_tiers":[{"cost":"0.000006","min":0,"max":32001},{"cost":"0.000012","min":32001,"max":128001},{"cost":"0.000015","min":128001}],"input_cache_read":"0.00000024","input_cache_read_tiers":[{"cost":"0.00000024","min":0,"max":32001},{"cost":"0.00000048","min":32001,"max":128001},{"cost":"0.0000006","min":128001}]}},{"id":"alibaba/qwen3-max-preview","object":"model","created":1755815280,"released":1757030400,"owned_by":"alibaba","name":"Qwen3 Max Preview","description":"Qwen3-Max-Preview shows substantial gains over the 2.5 series in overall capability, with significant enhancements in Chinese-English text understanding, complex instruction following, handling of subjective open-ended tasks, multilingual ability, and tool invocation; model knowledge hallucinations are reduced.","context_window":262144,"max_tokens":32768,"type":"language","tags":["tool-use","implicit-caching"],"pricing":{"input":"0.0000012","input_tiers":[{"cost":"0.0000012","min":0,"max":32001},{"cost":"0.0000024","min":32001,"max":128001},{"cost":"0.000003","min":128001}],"output":"0.000006","output_tiers":[{"cost":"0.000006","min":0,"max":32001},{"cost":"0.000012","min":32001,"max":128001},{"cost":"0.000015","min":128001}],"input_cache_read":"0.00000024","input_cache_read_tiers":[{"cost":"0.00000024","min":0,"max":32001},{"cost":"0.00000048","min":32001,"max":128001},{"cost":"0.0000006","min":128001}]}},{"id":"alibaba/qwen3-max-thinking","object":"model","created":1755815280,"released":1769126400,"owned_by":"alibaba","name":"Qwen 3 Max Thinking","description":"Compared with the snapshot as of September 23, 2025, the Qwen-3 series Max model in this release achieves an effective integration of thinking and non-thinking modes, resulting in a comprehensive and substantial improvement in the model’s overall performance. In thinking mode, the model simultaneously supports web search, web information extraction, and a code interpreter tool, enabling it to tackle more complex and challenging problems with greater accuracy by leveraging external tools while engaging in slow, deliberative reasoning. This version is based on a snapshot taken on January 23, 2026.","context_window":256000,"max_tokens":65536,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.0000012","input_tiers":[{"cost":"0.0000012","min":0,"max":32001},{"cost":"0.0000024","min":32001,"max":128001},{"cost":"0.000003","min":128001}],"output":"0.000006","output_tiers":[{"cost":"0.000006","min":0,"max":32001},{"cost":"0.000012","min":32001,"max":128001},{"cost":"0.000015","min":128001}],"input_cache_read":"0.00000024","input_cache_read_tiers":[{"cost":"0.00000024","min":0,"max":32001},{"cost":"0.00000048","min":32001,"max":128001},{"cost":"0.0000006","min":128001}]}},{"id":"alibaba/qwen3-next-80b-a3b-instruct","object":"model","created":1755815280,"released":1757548800,"owned_by":"alibaba","name":"Qwen3 Next 80B A3B Instruct","description":"A new generation of open-source, non-thinking mode model powered by Qwen3. This version demonstrates superior Chinese text understanding, augmented logical reasoning, and enhanced capabilities in text generation tasks over the previous iteration (Qwen3-235B-A22B-Instruct-2507).","context_window":131072,"max_tokens":32768,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000015","output":"0.0000012"}},{"id":"alibaba/qwen3-next-80b-a3b-thinking","object":"model","created":1755815280,"released":1757548800,"owned_by":"alibaba","name":"Qwen3 Next 80B A3B Thinking","description":"A new generation of Qwen3-based open-source thinking mode models. This version offers improved instruction following and streamlined summary responses over the previous iteration (Qwen3-235B-A22B-Thinking-2507).","context_window":131072,"max_tokens":32768,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000015","output":"0.0000012"}},{"id":"alibaba/qwen3-vl-235b-a22b-instruct","object":"model","created":1755815280,"released":1758585600,"owned_by":"alibaba","name":"Qwen3 VL 235B A22B Instruct","description":"The Qwen3 series VL models has been comprehensively upgraded in areas such as visual coding and spatial perception. Its visual perception and recognition capabilities have significantly improved, supporting the understanding of ultra-long videos, and its OCR functionality has undergone a major enhancement.","context_window":131072,"max_tokens":129024,"type":"language","tags":["tool-use","vision","implicit-caching"],"pricing":{"input":"0.0000004","output":"0.0000016"}},{"id":"alibaba/qwen3-vl-instruct","object":"model","created":1755815280,"released":1758585600,"owned_by":"alibaba","name":"Qwen3 VL 235B A22B Instruct","description":"The Qwen3 series VL models has been comprehensively upgraded in areas such as visual coding and spatial perception. Its visual perception and recognition capabilities have significantly improved, supporting the understanding of ultra-long videos, and its OCR functionality has undergone a major enhancement.","context_window":131072,"max_tokens":129024,"type":"language","tags":["tool-use","vision","implicit-caching"],"pricing":{"input":"0.0000004","output":"0.0000016"}},{"id":"alibaba/qwen3-vl-thinking","object":"model","created":1755815280,"released":1758585600,"owned_by":"alibaba","name":"Qwen3 VL 235B A22B Thinking","description":"Qwen3 series VL models feature significantly enhanced multimodal reasoning capabilities, with a particular focus on optimizing the model for STEM and mathematical reasoning. Visual perception and recognition abilities have been comprehensively improved, and OCR capabilities have undergone a major upgrade.","context_window":131072,"max_tokens":32768,"type":"language","tags":["vision","reasoning","tool-use","file-input"],"pricing":{"input":"0.0000004","output":"0.000004"}},{"id":"alibaba/qwen3.5-flash","object":"model","created":1755815280,"released":1771891200,"owned_by":"alibaba","name":"Qwen 3.5 Flash","description":"The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the 3 series, these models deliver a leap forward in performance for both pure text and multimodal tasks, offering fast response times while balancing inference speed and overall performance.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["vision","file-input","reasoning","tool-use"],"pricing":{"input":"0.0000001","output":"0.0000004","input_cache_read":"0.000000001","input_cache_write":"0.000000125"}},{"id":"alibaba/qwen3.5-plus","object":"model","created":1755815280,"released":1771200000,"owned_by":"alibaba","name":"Qwen 3.5 Plus","description":"The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of task evaluations, the 3.5 series consistently demonstrates performance on par with state-of-the-art leading models. Compared to the 3 series, these models show a leap forward in both pure-text and multimodal capabilities.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["vision","file-input","reasoning","tool-use"],"pricing":{"input":"0.0000004","input_tiers":[{"cost":"0.0000004","min":0,"max":256001},{"cost":"0.0000012","min":256001}],"output":"0.0000024","output_tiers":[{"cost":"0.0000024","min":0,"max":256001},{"cost":"0.0000072","min":256001}],"input_cache_read":"0.00000004","input_cache_read_tiers":[{"cost":"0.00000004","min":0,"max":256001},{"cost":"0.00000012","min":256001}],"input_cache_write":"0.0000005","input_cache_write_tiers":[{"cost":"0.0000005","min":0,"max":256001},{"cost":"0.0000015","min":256001}]}},{"id":"alibaba/qwen3.6-27b","object":"model","created":1755815280,"released":1776816000,"owned_by":"alibaba","name":"Qwen 3.6 27B","description":"The Qwen3.6 35B-A3B native vision-language model is built on a hybrid architecture that integrates linear attention mechanisms with a sparse mixture-of-experts framework, achieving higher inference efficiency. Compared with the 3.5-35B-A3B, this model demonstrates significantly improved agentic coding capabilities, mathematical and code reasoning abilities, spatial intelligence, as well as object localization and object detection performance.","context_window":256000,"max_tokens":256000,"type":"language","tags":["reasoning","tool-use","file-input","vision"],"pricing":{"input":"0.0000006","output":"0.0000036"}},{"id":"alibaba/qwen3.6-plus","object":"model","created":1755815280,"released":1775088000,"owned_by":"alibaba","name":"Qwen 3.6 Plus","description":"The Qwen3.6 native vision-language Plus series models demonstrate exceptional performance on par with the current state-of-the-art models, with a significant improvement in overall results compared to the 3.5 series. The models have been markedly enhanced in code-related capabilities such as agentic coding, front-end programming, and Vibe coding, as well as in multi-modal general object recognition, OCR, and object localization.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use","vision","file-input"],"pricing":{"input":"0.0000005","input_tiers":[{"cost":"0.0000005","min":0,"max":256000},{"cost":"0.000002","min":256000}],"output":"0.000003","output_tiers":[{"cost":"0.000003","min":0,"max":256000},{"cost":"0.000006","min":256000}],"input_cache_read":"0.0000001","input_cache_read_tiers":[{"cost":"0.0000001","min":0,"max":256000},{"cost":"0.0000002","min":256000}],"input_cache_write":"0.000000625","input_cache_write_tiers":[{"cost":"0.000000625","min":0,"max":256000},{"cost":"0.0000025","min":256000}]}},{"id":"alibaba/qwen3.7-max","object":"model","created":1755815280,"released":1779321600,"owned_by":"alibaba","name":"Qwen 3.7 Max","description":"Qwen3.7 is a next‑generation flagship model designed for the agent‑centric era, with its core strengths lying in the breadth and depth of its agent‑level capabilities: it excels at programming, office and productivity tasks, and long‑term autonomous execution.","context_window":991000,"max_tokens":64000,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.00000125","output":"0.00000375","input_cache_read":"0.00000025","input_cache_write":"0.0000015625"}},{"id":"alibaba/qwen3.7-plus","object":"model","created":1755815280,"released":1780358400,"owned_by":"alibaba","name":"Qwen 3.7 Plus","description":"Among the Qwen3.7 series, the cost-effective Plus model builds on its robust text capabilities while delivering a comprehensive upgrade to its vision‑language abilities, all while preserving its full‑stack agent‑level intelligence for coding, tool use, and productivity workflows.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"pricing":{"input":"0.0000004","input_tiers":[{"cost":"0.0000004","min":0,"max":256000},{"cost":"0.0000012","min":256000}],"output":"0.0000016","output_tiers":[{"cost":"0.0000016","min":0,"max":256000},{"cost":"0.0000048","min":256000}],"input_cache_read":"0.00000008","input_cache_read_tiers":[{"cost":"0.00000008","min":0,"max":256000},{"cost":"0.00000024","min":256000}],"input_cache_write":"0.0000005","input_cache_write_tiers":[{"cost":"0.0000005","min":0,"max":256000},{"cost":"0.0000015","min":256000}]}},{"id":"alibaba/wan-v2.5-t2v-preview","object":"model","created":1755815280,"released":1758672000,"owned_by":"alibaba","name":"Wan v2.5 Text-to-Video Preview","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[5,10],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.05"},{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"alibaba/wan-v2.6-i2v","object":"model","created":1755815280,"released":1765843200,"owned_by":"alibaba","name":"Wan v2.6 Image-to-Video","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"image":{"max_count":1,"max_file_size_mb":20,"supported_formats":["jpeg","jpg","png","bmp","webp"],"supported_sources":["url"],"min_dimension_pixels":240,"max_dimension_pixels":8000},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"alibaba/wan-v2.6-i2v-flash","object":"model","created":1755815280,"released":1765843200,"owned_by":"alibaba","name":"Wan v2.6 Image-to-Video Flash","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"image":{"max_count":1,"max_file_size_mb":20,"supported_formats":["jpeg","jpg","png","bmp","webp"],"supported_sources":["url"],"min_dimension_pixels":240,"max_dimension_pixels":8000},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.05"},{"resolution":"1080p","cost_per_second":"0.075"}]}},{"id":"alibaba/wan-v2.6-r2v","object":"model","created":1755815280,"released":1765843200,"owned_by":"alibaba","name":"Wan v2.6 Reference-to-Video","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["reference-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"image":{"max_count":5,"max_file_size_mb":20,"supported_formats":["jpeg","jpg","png","bmp","webp"],"supported_sources":["url"],"min_dimension_pixels":240,"max_dimension_pixels":8000},"video":{"max_count":3,"max_file_size_mb":100,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":1,"max_duration_seconds":30},"max_total_inputs":5}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"alibaba/wan-v2.6-r2v-flash","object":"model","created":1755815280,"released":1765843200,"owned_by":"alibaba","name":"Wan v2.6 Reference-to-Video Flash","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["reference-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"image":{"max_count":5,"max_file_size_mb":20,"supported_formats":["jpeg","jpg","png","bmp","webp"],"supported_sources":["url"],"min_dimension_pixels":240,"max_dimension_pixels":8000},"video":{"max_count":3,"max_file_size_mb":100,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":1,"max_duration_seconds":30},"max_total_inputs":5}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.05"},{"resolution":"1080p","cost_per_second":"0.075"}]}},{"id":"alibaba/wan-v2.6-t2v","object":"model","created":1755815280,"released":1765843200,"owned_by":"alibaba","name":"Wan v2.6 Text-to-Video","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":1500},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"alibaba/wan-v2.7-r2v","object":"model","created":1755815280,"released":1775520000,"owned_by":"alibaba","name":"Wan v2.7 Reference-to-Video","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["reference-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":5000},"image":{"max_count":5,"max_file_size_mb":20,"supported_formats":["jpeg","jpg","png","bmp","webp"],"supported_sources":["url","base64"],"min_dimension_pixels":240,"max_dimension_pixels":8000,"min_aspect_ratio":"1:8","max_aspect_ratio":"8:1"},"video":{"max_count":3,"max_file_size_mb":100,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":1,"max_duration_seconds":30,"min_dimension_pixels":240,"max_dimension_pixels":4096},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":1,"max_duration_seconds":10},"max_total_inputs":5}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"alibaba/wan-v2.7-t2v","object":"model","created":1755815280,"released":1775520000,"owned_by":"alibaba","name":"Wan v2.7 Text-to-Video","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1","4:3","3:4"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"text":{"max_chars":5000},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":30}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","cost_per_second":"0.1"},{"resolution":"1080p","cost_per_second":"0.15"}]}},{"id":"amazon/nova-2-lite","object":"model","created":1755815280,"released":1764633600,"owned_by":"amazon","name":"Nova 2 Lite","description":"Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text.","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["file-input","reasoning","tool-use","vision"],"regions":["eu","us"],"pricing":{"input":"0.0000003","output":"0.0000025","input_cache_read":"0.000000075"}},{"id":"amazon/nova-lite","object":"model","created":1755815280,"released":1733184000,"owned_by":"amazon","name":"Nova Lite","description":"A very low cost multimodal model that is lightning fast for processing image, video, and text inputs.","context_window":300000,"max_tokens":8192,"type":"language","tags":["file-input","tool-use","vision"],"regions":["eu"],"pricing":{"input":"0.00000006","output":"0.00000024"}},{"id":"amazon/nova-micro","object":"model","created":1755815280,"released":1733184000,"owned_by":"amazon","name":"Nova Micro","description":"A text-only model that delivers the lowest latency responses at very low cost.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use"],"regions":["eu"],"pricing":{"input":"0.000000035","output":"0.00000014"}},{"id":"amazon/nova-pro","object":"model","created":1755815280,"released":1733184000,"owned_by":"amazon","name":"Nova Pro","description":"A highly capable multimodal model with the best combination of accuracy, speed, and cost for a wide range of tasks.","context_window":300000,"max_tokens":8192,"type":"language","tags":["file-input","tool-use","vision"],"regions":["eu"],"pricing":{"input":"0.0000008","output":"0.0000032"}},{"id":"amazon/titan-embed-text-v2","object":"model","created":1755815280,"released":1714435200,"owned_by":"amazon","name":"Titan Text Embeddings V2","description":"Amazon Titan Text Embeddings V2 is a light weight, efficient multilingual embedding model supporting 1024, 512, and 256 dimensions.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000002"}},{"id":"anthropic/claude-3-haiku","object":"model","created":1755815280,"released":1710288000,"owned_by":"anthropic","name":"Claude 3 Haiku","description":"Claude 3 Haiku is Anthropic's fastest, most compact model for near-instant responsiveness. It answers simple queries and requests with speed. Customers will be able to build seamless AI experiences that mimic human interactions. Claude 3 Haiku can process images and return text outputs, and features a 200K context window.","context_window":200000,"max_tokens":4096,"type":"language","tags":["tool-use","vision","explicit-caching"],"pricing":{"input":"0.00000025","output":"0.00000125","input_cache_read":"0.00000003","input_cache_write":"0.0000003"}},{"id":"anthropic/claude-fable-5","object":"model","created":1755815280,"released":1782864000,"owned_by":"anthropic","name":"Claude Fable 5","description":"Claude Fable 5 is a Mythos-class model with robust safeguards. It can handle long-running, complex, and asynchronous tasks where previous models would have needed more frequent check-ins.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","explicit-caching","file-input","vision"],"regions":["us"],"pricing":{"input":"0.00001","output":"0.00005","input_cache_read":"0.000001","input_cache_write":"0.0000125","web_search":"10"}},{"id":"anthropic/claude-haiku-4.5","object":"model","created":1755815280,"released":1760486400,"owned_by":"anthropic","name":"Claude Haiku 4.5","description":"Claude Haiku 4.5 matches Sonnet 4's performance on coding, computer use, and agent tasks at substantially lower cost and faster speeds. It delivers near-frontier performance and Claude’s unique character at a price point that works for scaled sub-agent deployments, free tier products, and intelligence-sensitive applications with budget constraints.","context_window":200000,"max_tokens":64000,"type":"language","tags":["explicit-caching","file-input","reasoning","tool-use","vision","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000001","output":"0.000005","input_cache_read":"0.0000001","input_cache_write":"0.00000125","web_search":"10"}},{"id":"anthropic/claude-opus-4","object":"model","created":1755815280,"released":1747872000,"owned_by":"anthropic","name":"Claude Opus 4","description":"Claude Opus 4 is Anthropic's most powerful model yet and the state-of-the-art coding model. It delivers sustained performance on long-running tasks that require focused effort and thousands of steps, significantly expanding what AI agents can solve. Claude Opus 4 is ideal for powering frontier agent products and features.","context_window":200000,"max_tokens":8192,"type":"language","tags":["file-input","reasoning","tool-use","vision","explicit-caching"],"pricing":{"input":"0.000015","output":"0.000075","input_cache_read":"0.0000015","input_cache_write":"0.00001875","web_search":"10"}},{"id":"anthropic/claude-opus-4.1","object":"model","created":1755815280,"released":1754352000,"owned_by":"anthropic","name":"Claude Opus 4.1","description":"Claude Opus 4.1 is a drop-in replacement for Opus 4 that delivers superior performance and precision for real-world coding and agentic tasks. Opus 4.1 advances state-of-the-art coding performance to 74.5% on SWE-bench Verified, and handles complex, multi-step problems with more rigor and attention to detail.","context_window":200000,"max_tokens":32000,"type":"language","tags":["file-input","reasoning","tool-use","vision","explicit-caching"],"pricing":{"input":"0.000015","output":"0.000075","input_cache_read":"0.0000015","input_cache_write":"0.00001875","web_search":"10"}},{"id":"anthropic/claude-opus-4.5","object":"model","created":1755815280,"released":1763942400,"owned_by":"anthropic","name":"Claude Opus 4.5","description":"Claude Opus 4.5 is Anthropic’s latest model in the Opus series, meant for demanding reasoning tasks and complex problem solving. This model has improvements in general intelligence and vision compared to previous iterations. In addition, it is suited for difficult coding tasks and agentic workflows, especially those with computer use and tool use, and can effectively handle context usage and external memory files.","context_window":200000,"max_tokens":64000,"type":"language","tags":["explicit-caching","file-input","reasoning","tool-use","vision","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000005","output":"0.000025","input_cache_read":"0.0000005","input_cache_write":"0.00000625","web_search":"10"}},{"id":"anthropic/claude-opus-4.6","object":"model","created":1755815280,"released":1770249600,"owned_by":"anthropic","name":"Claude Opus 4.6","description":"Opus 4.6 is the world’s best model for coding and professional work, built to power agents that take on whole categories of real-world work. It excels across the entire SDLC, breaking through on hard problems, identifying complex bugs, and demonstrating deeper codebase understanding. It also delivers a step-change in knowledge work, with near-production-ready documents, presentations, and spreadsheets on the first pass.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["tool-use","reasoning","vision","file-input","explicit-caching","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000005","output":"0.000025","input_cache_read":"0.0000005","input_cache_write":"0.00000625","web_search":"10"}},{"id":"anthropic/claude-opus-4.7","object":"model","created":1755815280,"released":1776297600,"owned_by":"anthropic","name":"Claude Opus 4.7","description":"Opus 4.7 builds on the coding and agentic strengths of Opus 4.6 with stronger performance on complex, multi-step tasks and more reliable agentic execution. It also brings improved performance on knowledge work, from drafting documents to building presentations and analyzing data.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["tool-use","reasoning","vision","file-input","explicit-caching","web-search","fast"],"regions":["eu","us"],"pricing":{"input":"0.000005","output":"0.000025","input_cache_read":"0.0000005","input_cache_write":"0.00000625","web_search":"10"}},{"id":"anthropic/claude-opus-4.7-fast","object":"model","created":1755815280,"released":1776297600,"owned_by":"anthropic","name":"Claude Opus 4.7 (Fast)","description":"Opus 4.7 builds on the coding and agentic strengths of Opus 4.6 with stronger performance on complex, multi-step tasks and more reliable agentic execution. It also brings improved performance on knowledge work, from drafting documents to building presentations and analyzing data.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["tool-use","reasoning","vision","file-input","explicit-caching","web-search","fast"],"regions":["eu","us"],"pricing":{"input":"0.00003","output":"0.00015","input_cache_read":"0.000003","input_cache_write":"0.0000375","web_search":"10"}},{"id":"anthropic/claude-opus-4.8","object":"model","created":1755815280,"released":1779926400,"owned_by":"anthropic","name":"Claude Opus 4.8","description":"Opus 4.8 is a focused upgrade to Opus 4.7 and is Anthropic's best generally available model for coding, agentic tasks, and enterprise workflows. It builds on the strengths of previous Opus models with stronger performance on complex, multi-step coding tasks. Anthropic recommends using it on long-horizon coding and agentic tasks. It is also stronger on professional work, including document drafting, data analysis, and presentations.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["tool-use","reasoning","vision","file-input","explicit-caching","web-search","fast"],"regions":["eu","us"],"pricing":{"input":"0.000005","output":"0.000025","input_cache_read":"0.0000005","input_cache_write":"0.00000625","web_search":"10"}},{"id":"anthropic/claude-opus-4.8-fast","object":"model","created":1755815280,"released":1779926400,"owned_by":"anthropic","name":"Claude Opus 4.8 (Fast)","description":"Opus 4.8 is a focused upgrade to Opus 4.7 and is Anthropic's best generally available model for coding, agentic tasks, and enterprise workflows. It builds on the strengths of previous Opus models with stronger performance on complex, multi-step coding tasks. Anthropic recommends using it on long-horizon coding and agentic tasks. It is also stronger on professional work, including document drafting, data analysis, and presentations.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["tool-use","reasoning","vision","file-input","explicit-caching","web-search","fast"],"regions":["eu","us"],"pricing":{"input":"0.00001","output":"0.00005","input_cache_read":"0.000001","input_cache_write":"0.0000125","web_search":"10"}},{"id":"anthropic/claude-sonnet-4","object":"model","created":1755815280,"released":1747872000,"owned_by":"anthropic","name":"Claude Sonnet 4","description":"Claude Sonnet 4 balances impressive performance for coding with the right speed and cost for high-volume use cases: Coding: Handle everyday development tasks with enhanced performance-power code reviews, bug fixes, API integrations, and feature development with immediate feedback loops.","context_window":1000000,"max_tokens":8192,"type":"language","tags":["file-input","reasoning","tool-use","vision","explicit-caching"],"pricing":{"input":"0.000003","input_tiers":[{"cost":"0.000003","min":0,"max":200001},{"cost":"0.000006","min":200001}],"output":"0.000015","output_tiers":[{"cost":"0.000015","min":0,"max":200001},{"cost":"0.0000225","min":200001}],"input_cache_read":"0.0000003","input_cache_read_tiers":[{"cost":"0.0000003","min":0,"max":200001},{"cost":"0.0000006","min":200001}],"input_cache_write":"0.00000375","input_cache_write_tiers":[{"cost":"0.00000375","min":0,"max":200001},{"cost":"0.0000075","min":200001}]}},{"id":"anthropic/claude-sonnet-4.5","object":"model","created":1755815280,"released":1759104000,"owned_by":"anthropic","name":"Claude Sonnet 4.5","description":"Claude Sonnet 4.5 is the newest model in the Sonnet series, offering improvements and updates over Sonnet 4.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["explicit-caching","file-input","reasoning","tool-use","vision","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000003","input_tiers":[{"cost":"0.000003","min":0,"max":200001},{"cost":"0.000006","min":200001}],"output":"0.000015","output_tiers":[{"cost":"0.000015","min":0,"max":200001},{"cost":"0.0000225","min":200001}],"input_cache_read":"0.0000003","input_cache_read_tiers":[{"cost":"0.0000003","min":0,"max":200001},{"cost":"0.0000006","min":200001}],"input_cache_write":"0.00000375","input_cache_write_tiers":[{"cost":"0.00000375","min":0,"max":200001},{"cost":"0.0000075","min":200001}],"web_search":"10"}},{"id":"anthropic/claude-sonnet-4.6","object":"model","created":1755815280,"released":1771286400,"owned_by":"anthropic","name":"Claude Sonnet 4.6","description":"Claude Sonnet 4.6 is the most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["file-input","reasoning","tool-use","vision","explicit-caching","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000003","output":"0.000015","input_cache_read":"0.0000003","input_cache_write":"0.00000375","web_search":"10"}},{"id":"anthropic/claude-sonnet-5","object":"model","created":1755815280,"released":1782691200,"owned_by":"anthropic","name":"Claude Sonnet 5","description":"Sonnet 5 is an upgrade to Sonnet 4.6, with gains across agentic coding and professional work. It builds on the strengths of previous Sonnet models, bringing top-tier intelligence at Sonnet pricing for coding, agents, and everyday professional work at scale.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","explicit-caching","file-input","vision","web-search"],"regions":["eu","us"],"pricing":{"input":"0.000002","output":"0.00001","input_cache_read":"0.0000002","input_cache_write":"0.0000025","web_search":"10"}},{"id":"arcee-ai/trinity-large-thinking","object":"model","created":1755815280,"released":1775001600,"owned_by":"arcee-ai","name":"Trinity Large Thinking","description":"Trinity-Large-Thinking is a reasoning-optimized variant of Arcee AI's Trinity-Large family — a 398B-parameter sparse Mixture-of-Experts (MoE) model with approximately 13B active parameters per token. Built on Trinity-Large-Base and post-trained with extended chain-of-thought reasoning and agentic RL, Trinity-Large-Thinking delivers state-of-the-art performance on agentic benchmarks while maintaining strong general capabilities.","context_window":262100,"max_tokens":80000,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000025","output":"0.0000009"}},{"id":"arcee-ai/trinity-mini","object":"model","created":1755815280,"released":1764547200,"owned_by":"arcee-ai","name":"Trinity Mini","description":"Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model, engineered for efficient inference over long contexts with robust function calling and multi-step agent workflows.","context_window":131072,"max_tokens":131072,"type":"language","tags":["tool-use"],"pricing":{"input":"0.000000045","output":"0.00000015"}},{"id":"bfl/flux-2-flex","object":"model","created":1755815280,"released":1764028800,"owned_by":"bfl","name":"FLUX.2 [flex]","description":"FLUX.2 is a completely new base model trained for visual intelligence, not just pixel generation, setting a new standard for both image generation and image editing. With FLUX.2 models you can expect the highest quality, higher resolutions (up to 4MP), and new capabilities like multi-ref images. FLUX.2 [flex] supports customizable image generation and editing with adjustable steps and guidance. It's better at typography and text rendering. It supports up to 10 reference images (up to 14 MP total input).\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"bfl/flux-2-klein-4b","object":"model","created":1755815280,"released":1768435200,"owned_by":"bfl","name":"FLUX.2 [klein] 4B","description":"FLUX.2 [klein] is Black Forest Labs' fastest image model yet - it unifies image generation and editing in a single, compact model. Delivering state-of-the-art quality with end-to-end inference in less than a second. Enabling interactive workflows, real-time previews, and latency-critical applications.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"bfl/flux-2-klein-9b","object":"model","created":1755815280,"released":1768435200,"owned_by":"bfl","name":"FLUX.2 [klein] 9B","description":"FLUX.2 [klein] is Black Forest Labs' fastest image model yet - it unifies image generation and editing in a single, compact model. Delivering state-of-the-art quality with end-to-end inference in less than a second. Enabling interactive workflows, real-time previews, and latency-critical applications.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"bfl/flux-2-max","object":"model","created":1755815280,"released":1765843200,"owned_by":"bfl","name":"FLUX.2 [max]","description":"FLUX.2 [max] offers image generation and image editing with the highest quality available. It delivers state-of-the-art image generation and advanced image editing with exceptional realism, precision, and consistency. Built for professional use, FLUX.2 [max] produces production-ready outputs for marketing teams, creatives, filmmakers, and creators around the world.","context_window":67300,"max_tokens":67300,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"bfl/flux-2-pro","object":"model","created":1755815280,"released":1764028800,"owned_by":"bfl","name":"FLUX.2 [pro]","description":"FLUX.2 is a completely new base model trained for visual intelligence, not just pixel generation, setting a new standard for both image generation and image editing. With FLUX.2 models you can expect the highest quality, higher resolutions (up to 4MP), and new capabilities like multi-ref images. FLUX.2 [pro] supports generation, editing, and multiple reference images (up to 9 MP total input).\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":67300,"max_tokens":67300,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"bfl/flux-kontext-max","object":"model","created":1755815280,"released":1748476800,"owned_by":"bfl","name":"FLUX.1 Kontext Max","description":"FLUX.1 Kontext creates images from text prompts with unique capabilities for character consistency and advanced editing. It also edits images using simple text prompts. No complex workflows or fine-tuning needed.\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":512,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.08"}},{"id":"bfl/flux-kontext-pro","object":"model","created":1755815280,"released":1748476800,"owned_by":"bfl","name":"FLUX.1 Kontext Pro","description":"FLUX.1 Kontext creates images from text prompts with unique capabilities for character consistency and advanced editing. It also edits images using simple text prompts. No complex workflows or fine-tuning needed.\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":512,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04"}},{"id":"bfl/flux-pro-1.0-fill","object":"model","created":1755815280,"released":1727740800,"owned_by":"bfl","name":"FLUX.1 Fill [pro]","description":"A state-of-the-art inpainting model, enabling editing and expansion of real and generated images given a text description and a binary mask.\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.05"}},{"id":"bfl/flux-pro-1.1","object":"model","created":1755815280,"released":1727827200,"owned_by":"bfl","name":"FLUX1.1 [pro]","description":"FLUX1.1 [pro] is the standard for text-to-image generation with fast, reliable and consistently stunning results.\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04"}},{"id":"bfl/flux-pro-1.1-ultra","object":"model","created":1755815280,"released":1730419200,"owned_by":"bfl","name":"FLUX1.1 [pro] Ultra","description":"FLUX1.1 [pro] Ultra delivers ultra-fast, ultra high-resolution image creation - with more pixels in every picture. Generate varying aspect ratios from text, at 4MP resolution fast.\nThis provider gives the option to change the moderation level for inputs and outputs. The control is under safety tolerance and is by default 2 on a range from 0 (more strict) through 6 (more permissive).","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.06"}},{"id":"bytedance/seed-1.6","object":"model","created":1755815280,"released":1756684800,"owned_by":"bytedance","name":"Seed 1.6","description":"ByteDance's new multimodal deep-thinking model, supporting both text and visual inputs with enhanced reasoning capabilities.","context_window":256000,"max_tokens":32000,"type":"language","tags":["reasoning","tool-use","vision"],"pricing":{"input":"0.00000025","input_tiers":[{"cost":"0.00000025","min":0,"max":128001},{"cost":"0.0000005","min":128001}],"output":"0.000002","output_tiers":[{"cost":"0.000002","min":0,"max":128001},{"cost":"0.000004","min":128001}],"input_cache_read":"0.00000005"}},{"id":"bytedance/seed-1.8","object":"model","created":1755815280,"released":1756684800,"owned_by":"bytedance","name":"Bytedance Seed 1.8","description":"Bytedance Seed 1.8 features stronger multimodal understanding and agent capabilities. The model delivers superior performance across a wide range of complex real-world tasks, helping enterprises create greater value.","context_window":256000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use","vision"],"pricing":{"input":"0.00000025","input_tiers":[{"cost":"0.00000025","min":0,"max":128001},{"cost":"0.0000005","min":128001}],"output":"0.000002","output_tiers":[{"cost":"0.000002","min":0,"max":128001},{"cost":"0.000004","min":128001}],"input_cache_read":"0.00000005"}},{"id":"bytedance/seedance-2.0","object":"model","created":1755815280,"released":1776124800,"owned_by":"bytedance","name":"Seedance 2.0","description":"Built with a unified multimodal audio-video joint generation architecture, Seedance 2.0 supports four input modalities: text, image, audio, and video. Compared with Version 1.5, Seedance 2.0 delivers a substantial leap in generation quality. It achieves a higher usability rate for complex interaction and motion scenes, with significant improvements in physical accuracy, visual realism, and controllability, making it well-suited for high-quality creation scenarios.","context_window":0,"max_tokens":0,"type":"video","tags":["video-generation","vision"],"video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame","reference-to-video"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["16:9","4:3","1:1","3:4","9:16","21:9"],"supported_durations_seconds":[4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"image":{"max_count":9,"max_file_size_mb":30,"supported_formats":["jpeg","png","webp","bmp","tiff","gif"],"supported_sources":["url"],"min_dimension_pixels":300,"max_dimension_pixels":6000,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"},"video":{"max_count":3,"max_file_size_mb":50,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":15,"min_dimension_pixels":300,"max_dimension_pixels":6000},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":15}}},"pricing":{"video_token_pricing":{"no_video_input":{"cost_per_million_tokens":"7"},"with_video_input":{"cost_per_million_tokens":"4.3"},"notes":"Pricing varies based on whether the input contains video. When video is included in the input, all tokens are billed at the reduced rate. Token count includes both input and output video and is subject to minimum token floors based on output duration."}}},{"id":"bytedance/seedance-2.0-fast","object":"model","created":1755815280,"released":1776124800,"owned_by":"bytedance","name":"Seedance 2.0 Fast","description":"Seedance 2.0 Fast is a new-generation multimodal video creation model, inheriting the core functions and advantages of Seedance 2.0, with faster speed.","context_window":0,"max_tokens":0,"type":"video","tags":["vision","video-generation"],"video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame","reference-to-video"],"supported_resolutions":["480p","720p"],"supported_aspect_ratios":["16:9","4:3","1:1","3:4","9:16","21:9"],"supported_durations_seconds":[4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"image":{"max_count":9,"max_file_size_mb":30,"supported_formats":["jpeg","png","webp","bmp","tiff","gif"],"supported_sources":["url"],"min_dimension_pixels":300,"max_dimension_pixels":6000,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"},"video":{"max_count":3,"max_file_size_mb":50,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":15,"min_dimension_pixels":300,"max_dimension_pixels":6000},"audio":{"max_file_size_mb":15,"supported_formats":["wav","mp3"],"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":15}}},"pricing":{"video_token_pricing":{"no_video_input":{"cost_per_million_tokens":"5.6"},"with_video_input":{"cost_per_million_tokens":"3.3"},"notes":"Pricing varies based on whether the input contains video. When video is included in the input, all tokens are billed at the reduced rate. Token count includes both input and output video and is subject to minimum token floors based on output duration."}}},{"id":"bytedance/seedance-v1.0-pro","object":"model","created":1755815280,"released":1749600000,"owned_by":"bytedance","name":"Seedance v1.0 Pro","description":"A video generation model that supports multi-shot storytelling. It excels in semantic understanding and instruction following, producing smooth, detailed, and cinematic 1080P HD videos.","context_window":0,"max_tokens":0,"type":"video","tags":["video-generation"],"video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["16:9","4:3","1:1","3:4","9:16","21:9"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12],"generate_audio":false,"supported_fps":[24],"input_limits":{"image":{"max_count":2,"max_file_size_mb":30,"supported_formats":["jpeg","png","webp","bmp","tiff","gif"],"supported_sources":["url"],"min_dimension_pixels":300,"max_dimension_pixels":6000,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.0243"},{"resolution":"720p","cost_per_second":"0.0515"},{"resolution":"1080p","cost_per_second":"0.1224"}]}},{"id":"bytedance/seedance-v1.0-pro-fast","object":"model","created":1755815280,"released":1761264000,"owned_by":"bytedance","name":"Seedance v1.0 Pro Fast","description":"Seedance 1.0 Pro Fast delivers top performance at an unbeatable price, balancing quality, speed, and cost. Built on Seedance 1.0 Pro’s core strengths, it’s faster and more cost-efficient for creators.","context_window":0,"max_tokens":0,"type":"video","tags":["video-generation"],"video_capabilities":{"supported_operations":["text-to-video","image-to-video"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["16:9","4:3","1:1","3:4","9:16","21:9"],"supported_durations_seconds":[2,3,4,5,6,7,8,9,10,11,12],"generate_audio":false,"supported_fps":[24],"input_limits":{"image":{"max_count":1,"max_file_size_mb":30,"supported_formats":["jpeg","png","webp","bmp","tiff","gif"],"supported_sources":["url"],"min_dimension_pixels":300,"max_dimension_pixels":6000,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.0097"},{"resolution":"720p","cost_per_second":"0.0206"},{"resolution":"1080p","cost_per_second":"0.049"}]}},{"id":"bytedance/seedance-v1.5-pro","object":"model","created":1755815280,"released":1765843200,"owned_by":"bytedance","name":"Seedance v1.5 Pro","description":"ByteDance's Seedance 1.5 Pro is a professional video model using V2A native generation for integrated, synced audio-visual output, enhancing efficiency of professional video creation.","context_window":0,"max_tokens":0,"type":"video","tags":["video-generation"],"video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["16:9","4:3","1:1","3:4","9:16","21:9"],"supported_durations_seconds":[4,5,6,7,8,9,10,11,12],"generate_audio":true,"supported_fps":[24],"input_limits":{"image":{"max_count":2,"max_file_size_mb":30,"supported_formats":["jpeg","png","webp","bmp","tiff","gif","heic","heif"],"supported_sources":["url"],"min_dimension_pixels":300,"max_dimension_pixels":6000,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","audio":false,"cost_per_second":"0.0121"},{"resolution":"480p","audio":true,"cost_per_second":"0.0241"},{"resolution":"720p","audio":false,"cost_per_second":"0.0259"},{"resolution":"720p","audio":true,"cost_per_second":"0.0518"},{"resolution":"1080p","audio":false,"cost_per_second":"0.0583"},{"resolution":"1080p","audio":true,"cost_per_second":"0.1166"}]}},{"id":"bytedance/seedream-4.0","object":"model","created":1755815280,"released":1757376000,"owned_by":"bytedance","name":"Seedream 4.0","description":"Seedream 4.0 is a SOTA multimodal image creation model built on leading architecture. It breaks through the boundaries of traditional text-to-image models by natively supporting text, single-image, and multi-image inputs. Users can freely combine text and images to achieve diverse creative modes within a single model—such as multi-image blending, image editing, and sequentially batch image generation, featuring subject consistency, making image creation more free and controllable.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.03"}},{"id":"bytedance/seedream-4.5","object":"model","created":1755815280,"released":1764720000,"owned_by":"bytedance","name":"Seedream 4.5","description":"Seedream 4.5 is the latest in-house image generation model developed by ByteDance. Compared with Seedream 4.0, it delivers comprehensive improvements—especially in editing consistency, including better preservation of subject details, lighting, and color tone. It also enhances portrait refinement and small-text rendering. The model’s multi-image composition capabilities have been significantly strengthened, and both reasoning performance and visual aesthetics continue to advance, enabling more accurate and artistically expressive image generation.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04"}},{"id":"bytedance/seedream-5.0-lite","object":"model","created":1755815280,"released":1770940800,"owned_by":"bytedance","name":"Seedream 5.0 Lite","description":"ByteDance-Seedream-5.0-lite is the latest image generation model released by BytePlus. For the first time, it introduces web-connected retrieval, enabling the model to fuse real-time online information to significantly improve the timeliness and relevance of generated images. The model’s reasoning and comprehension capabilities are further upgraded, allowing it to accurately interpret complex prompts and visual inputs. In addition, ByteDance-Seedream-5.0-lite delivers notable improvements in global knowledge coverage, reference consistency, and professional-grade scene generation, making it well suited for enterprise-level visual creation workflows.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.035"}},{"id":"bytedance/seedream-5.0-pro","object":"model","created":1755815280,"released":1783728000,"owned_by":"bytedance","name":"Seedream 5.0 Pro","description":"Seedream-5.0-Pro, ByteDance's newest image generation model, delivers comprehensive upgrades for complex, lifelike image creation and editing, ushering in a new phase of controllable visual production. It stands out with precise editing control, robust commercial applicability and natural rendering results.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"input":"0.000000003","image":"0.035"}},{"id":"cohere/command-a","object":"model","created":1755815280,"released":1741824000,"owned_by":"cohere","name":"Command A","description":"Command A is Cohere's most performant model to date, excelling at tool use, agents, retrieval augmented generation (RAG), and multilingual use cases. Command A has a context length of 256K, only requires two GPUs to run, and has 150% higher throughput compared to Command R+ 08-2024.","context_window":256000,"max_tokens":8000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000025","output":"0.00001"}},{"id":"cohere/embed-v4.0","object":"model","created":1755815280,"released":1744675200,"owned_by":"cohere","name":"Embed v4.0","description":"A model that allows for text, images, or mixed content to be classified or turned into embeddings.","context_window":128000,"max_tokens":0,"type":"embedding","regions":["us"],"pricing":{"input":"0.00000012"}},{"id":"cohere/rerank-v3.5","object":"model","created":1755815280,"released":1733097600,"owned_by":"cohere","name":"Cohere Rerank 3.5","description":"A model that allows for re-ranking English Language documents and semi-structured data (JSON).","context_window":4096,"max_tokens":4096,"type":"reranking","regions":["us"],"pricing":{}},{"id":"cohere/rerank-v4-fast","object":"model","created":1755815280,"released":1765411200,"owned_by":"cohere","name":"Cohere Rerank 4 Fast","description":"A light version of Rerank 4 Pro, this is a multilingual model that allows for re-ranking English and non-english documents and semi-structured data (JSON). This model is better suited for low latency and high throughput use-cases than its pro variant.","context_window":32000,"max_tokens":32000,"type":"reranking","pricing":{}},{"id":"cohere/rerank-v4-pro","object":"model","created":1755815280,"released":1765411200,"owned_by":"cohere","name":"Cohere Rerank 4 Pro","description":"A multilingual model that allows for re-ranking English and non-english documents and semi-structured data (JSON). This model is better suited for state-of-the-art quality and complex use-cases than its fast variant.","context_window":32000,"max_tokens":32000,"type":"reranking","pricing":{}},{"id":"deepseek/deepseek-r1","object":"model","created":1755815280,"released":1737331200,"owned_by":"deepseek","name":"DeepSeek-R1","description":"DeepSeek-R1 provides customers a state-of-the-art reasoning model, optimized for general reasoning tasks, math, science, and code generation.","context_window":128000,"max_tokens":8192,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000135","output":"0.0000054"}},{"id":"deepseek/deepseek-v3","object":"model","created":1755815280,"released":1735171200,"owned_by":"deepseek","name":"DeepSeek V3 0324","description":"DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.","context_window":163840,"max_tokens":163840,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000027","output":"0.00000112","input_cache_read":"0.000000135"}},{"id":"deepseek/deepseek-v3.1","object":"model","created":1755815280,"released":1755734400,"owned_by":"deepseek","name":"DeepSeek V3.1","description":"DeepSeek-V3.1 is post-trained on the top of DeepSeek-V3.1-Base, which is built upon the original V3 base checkpoint through a two-phase long context extension approach, following the methodology outlined in the original DeepSeek-V3 report. We have expanded our dataset by collecting additional long documents and substantially extending both training phases. The 32K extension phase has been increased 10-fold to 630B tokens, while the 128K extension phase has been extended by 3.3x to 209B tokens. Additionally, DeepSeek-V3.1 is trained using the UE8M0 FP8 scale data format to ensure compatibility with microscaling data formats.","context_window":163840,"max_tokens":128000,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.00000025","output":"0.00000095","input_cache_read":"0.00000013"}},{"id":"deepseek/deepseek-v3.1-terminus","object":"model","created":1755815280,"released":1758499200,"owned_by":"deepseek","name":"DeepSeek V3.1 Terminus","description":"DeepSeek-V3.1-Terminus delivers more stable & reliable outputs across benchmarks compared to the previous version and addresses user feedback (i.e. language consistency and agent upgrades).","context_window":131072,"max_tokens":65536,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.00000027","output":"0.000001","input_cache_read":"0.000000135"}},{"id":"deepseek/deepseek-v3.2","object":"model","created":1755815280,"released":1764547200,"owned_by":"deepseek","name":"DeepSeek V3.2","description":"DeepSeek-V3.2: Official successor to V3.2-Exp.","context_window":128000,"max_tokens":8000,"type":"language","tags":["tool-use","implicit-caching"],"pricing":{"input":"0.00000028","output":"0.00000042","input_cache_read":"0.000000028"}},{"id":"deepseek/deepseek-v3.2-thinking","object":"model","created":1755815280,"released":1764547200,"owned_by":"deepseek","name":"DeepSeek V3.2 Thinking","description":"DeepSeek‑V3.2 from DeepSeek harmonizes high computational efficiency with superior reasoning and agent performance. It builds on three main techniques: DeepSeek Sparse Attention for long‑context efficiency, a scalable reinforcement learning framework, and a large‑scale agentic task synthesis pipeline. This model excels at long-context reasoning and agentic tasks, efficiently handling extended inputs while maintaining strong accuracy. Its sparse attention design enables it to process complex, multi-step workflows without excessive compute costs. Overall, DeepSeek‑V3.2 targets long‑context reasoning, tool‑using agents, and efficient deployment in production environments.","context_window":128000,"max_tokens":8000,"type":"language","tags":["tool-use","implicit-caching","reasoning"],"pricing":{"input":"0.00000062","output":"0.00000185"}},{"id":"deepseek/deepseek-v4-flash","object":"model","created":1755815280,"released":1776902400,"owned_by":"deepseek","name":"DeepSeek V4 Flash","description":"DeepSeek-V4 series incorporate several key upgrades in architecture and optimization: (1) a hybrid attention architecture that combines Compressed Sparse Attention (CSA)\nand Heavily Compressed Attention (HCA) to improve long-context efficiency; (2) ManifoldConstrained Hyper-Connections (mHC) that enhance conventional residual connections; (3)\nand the Muon optimizer for faster convergence and greater training stability","context_window":1000000,"max_tokens":384000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000014","output":"0.00000028","input_cache_read":"0.000000028"}},{"id":"deepseek/deepseek-v4-pro","object":"model","created":1755815280,"released":1776902400,"owned_by":"deepseek","name":"DeepSeek V4 Pro","description":"DeepSeek-V4 series incorporate several key upgrades in architecture and optimization: (1) a hybrid attention architecture that combines Compressed Sparse Attention (CSA)\nand Heavily Compressed Attention (HCA) to improve long-context efficiency; (2) ManifoldConstrained Hyper-Connections (mHC) that enhance conventional residual connections; (3)\nand the Muon optimizer for faster convergence and greater training stability","context_window":1000000,"max_tokens":384000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.000000435","output":"0.00000087","input_cache_read":"0.0000000036"}},{"id":"google/gemini-2.5-flash","object":"model","created":1755815280,"released":1742428800,"owned_by":"google","name":"Gemini 2.5 Flash","description":"Gemini 2.5 Flash is a thinking model that offers great, well-rounded capabilities. It is designed to offer a balance between price and performance with multimodal support and a 1M token context window.","context_window":1000000,"max_tokens":65536,"type":"language","tags":["file-input","reasoning","tool-use","vision","web-search","implicit-caching"],"regions":["eu","us"],"pricing":{"input":"0.0000003","output":"0.0000025","input_cache_read":"0.00000003","web_search":"35","maps_search":"25","service_tiers":{"priority":{"input":"0.00000054","output":"0.0000045","input_cache_read":"0.000000054"},"flex":{"input":"0.00000015","output":"0.00000125"}}}},{"id":"google/gemini-2.5-flash-image","object":"model","created":1755815280,"released":1756166400,"owned_by":"google","name":"Nano Banana (Gemini 2.5 Flash Image)","description":"Nano Banana (Gemini 2.5 Flash Image) is Google's first fully hybrid reasoning model, letting developers turn thinking on or off and set thinking budgets to balance quality, cost, and latency. Upgraded for rapid creative workflows, it can generate interleaved text and images and supports conversational, multi‑turn image editing in natural language. It’s also locale‑aware, enabling culturally and linguistically appropriate image generation for audiences worldwide.","context_window":32768,"max_tokens":65536,"type":"language","tags":["image-generation","web-search"],"regions":["eu","us"],"pricing":{"input":"0.0000003","output":"0.0000025","input_cache_read":"0.00000003","image_dimension_quality_pricing":[{"size":"default","cost":"0.039"}],"maps_search":"25","service_tiers":{"flex":{"input":"0.00000015","output":"0.00000125"}}}},{"id":"google/gemini-2.5-flash-lite","object":"model","created":1755815280,"released":1750118400,"owned_by":"google","name":"Gemini 2.5 Flash Lite","description":"Gemini 2.5 Flash-Lite is a balanced, low-latency model with configurable thinking budgets and tool connectivity (e.g., Google Search grounding and code execution). It supports multimodal input and offers a 1M-token context window.","context_window":1048576,"max_tokens":65536,"type":"language","tags":["file-input","reasoning","tool-use","vision","web-search","implicit-caching"],"regions":["eu","us"],"pricing":{"input":"0.0000001","output":"0.0000004","input_cache_read":"0.00000001","web_search":"35","maps_search":"25","service_tiers":{"priority":{"input":"0.00000018","output":"0.00000072","input_cache_read":"0.000000018"},"flex":{"input":"0.00000005","output":"0.0000002"}}}},{"id":"google/gemini-2.5-pro","object":"model","created":1755815280,"released":1742428800,"owned_by":"google","name":"Gemini 2.5 Pro","description":"Gemini 2.5 Pro is our most advanced reasoning Gemini model, capable of solving complex problems. Gemini 2.5 Pro can comprehend vast datasets and challenging problems from different information sources, including text, audio, images, video, and even entire code repositories.","context_window":1048576,"max_tokens":65536,"type":"language","tags":["file-input","reasoning","tool-use","vision","web-search","implicit-caching"],"regions":["eu","us"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.00001","output_tiers":[{"cost":"0.00001","min":0,"max":200001},{"cost":"0.000015","min":200001}],"input_cache_read":"0.000000125","input_cache_read_tiers":[{"cost":"0.000000125","min":0,"max":200001},{"cost":"0.00000025","min":200001}],"web_search":"35","maps_search":"25","service_tiers":{"priority":{"input":"0.00000225","output":"0.000018","input_cache_read":"0.000000225","long_context":{"threshold":200000,"input":"0.0000045","output":"0.000027","input_cache_read":"0.00000045"}},"flex":{"input":"0.000000625","output":"0.000005","long_context":{"threshold":200000,"input":"0.00000125","output":"0.0000075"}}}}},{"id":"google/gemini-3-flash","object":"model","created":1755815280,"released":1765929600,"owned_by":"google","name":"Gemini 3 Flash","description":"Google's most intelligent model built for speed, combining frontier intelligence with superior search and grounding.","context_window":1000000,"max_tokens":65000,"type":"language","tags":["reasoning","file-input","vision","tool-use","web-search","implicit-caching"],"pricing":{"input":"0.0000005","input_tiers":[{"cost":"0.0000005","min":0,"max":200001},{"cost":"0.0000005","min":200001}],"output":"0.000003","output_tiers":[{"cost":"0.000003","min":0,"max":200001},{"cost":"0.000003","min":200001}],"input_cache_read":"0.00000005","input_cache_read_tiers":[{"cost":"0.00000005","min":0,"max":200001},{"cost":"0.00000005","min":200001}],"web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.0000009","output":"0.0000054","input_cache_read":"0.00000009"},"flex":{"input":"0.00000025","output":"0.0000015"}}}},{"id":"google/gemini-3-pro-image","object":"model","created":1755815280,"released":1756684800,"owned_by":"google","name":"Nano Banana Pro (Gemini 3 Pro Image)","description":"Nano Banana Pro (Gemini 3 Pro Image) builds on Nano Banana's generation capabilities into a new era of studio-quality, functional design to help you create and edit high-fidelity, production-ready visuals with unparalleled precision and control. Improvements include enhanced world knowledge and reasoning, dynamic text and translation, and studio level controls.","context_window":65536,"max_tokens":32768,"type":"language","tags":["image-generation","web-search","implicit-caching"],"pricing":{"input":"0.000002","output":"0.000012","input_cache_read":"0.0000002","image_dimension_quality_pricing":[{"size":"1K","cost":"0.1344"},{"size":"2K","cost":"0.1344"},{"size":"4K","cost":"0.24"},{"size":"default","cost":"0.1344"}],"web_search":"14","maps_search":"14","service_tiers":{"flex":{"input":"0.000001","output":"0.000006"}}}},{"id":"google/gemini-3-pro-preview","object":"model","created":1755815280,"released":1763424000,"owned_by":"google","name":"Gemini 3 Pro Preview","description":"This model improves upon Gemini 2.5 Pro and is catered towards challenging tasks, especially those involving complex reasoning or agentic workflows. Improvements highlighted include use cases for coding, multi-step function calling, planning, reasoning, deep knowledge tasks, and instruction following.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["file-input","tool-use","reasoning","vision","web-search","implicit-caching"],"pricing":{"input":"0.000002","input_tiers":[{"cost":"0.000002","min":0,"max":200001},{"cost":"0.000004","min":200001}],"output":"0.000012","output_tiers":[{"cost":"0.000012","min":0,"max":200001},{"cost":"0.000018","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.0000036","output":"0.0000216","input_cache_read":"0.00000036","long_context":{"threshold":200000,"input":"0.0000072","output":"0.0000324","input_cache_read":"0.00000072"}},"flex":{"input":"0.000001","output":"0.000006","long_context":{"threshold":200000,"input":"0.000002","output":"0.000009"}}}}},{"id":"google/gemini-3.1-flash-image","object":"model","created":1755815280,"released":1779926400,"owned_by":"google","name":"Gemini 3.1 Flash Image (Nano Banana 2)","description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","context_window":131072,"max_tokens":32768,"type":"language","tags":["image-generation","implicit-caching","reasoning","vision","web-search"],"pricing":{"input":"0.0000005","output":"0.000003","input_cache_read":"0.00000005","image_dimension_quality_pricing":[{"size":"512","cost":"0.045"},{"size":"1K","cost":"0.067"},{"size":"2K","cost":"0.101"},{"size":"4K","cost":"0.151"},{"size":"default","cost":"0.067"}],"web_search":"14","maps_search":"14"}},{"id":"google/gemini-3.1-flash-image-preview","object":"model","created":1755815280,"released":1772064000,"owned_by":"google","name":"Gemini 3.1 Flash Image Preview (Nano Banana 2)","description":"Gemini 3.1 Flash Image is optimized for image understanding and generation and offers a balance of price and performance.","context_window":131072,"max_tokens":32768,"type":"language","tags":["image-generation","implicit-caching","reasoning","vision","web-search"],"pricing":{"input":"0.0000005","output":"0.000003","input_cache_read":"0.00000005","image_dimension_quality_pricing":[{"size":"512","cost":"0.045"},{"size":"1K","cost":"0.067"},{"size":"2K","cost":"0.101"},{"size":"4K","cost":"0.151"},{"size":"default","cost":"0.067"}],"web_search":"14","maps_search":"14","service_tiers":{"flex":{"input":"0.00000025","output":"0.0000015"}}}},{"id":"google/gemini-3.1-flash-lite","object":"model","created":1755815280,"released":1778112000,"owned_by":"google","name":"Gemini 3.1 Flash Lite","description":"Gemini 3.1 Flash Lite outperforms 2.5 Flash Lite on overall quality and lands close to 2.5 Flash performance across key capability areas. It is a workhorse model for high-volume use cases, with improvements across audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion.","context_window":1000000,"max_tokens":65000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision","web-search"],"regions":["eu","us"],"pricing":{"input":"0.00000025","output":"0.0000015","input_cache_read":"0.00000003","web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.00000045","output":"0.0000027","input_cache_read":"0.000000045"},"flex":{"input":"0.000000125","output":"0.00000075","input_cache_read":"0.0000000125"}}}},{"id":"google/gemini-3.1-flash-lite-image","object":"model","created":1755815280,"released":1782777600,"owned_by":"google","name":"Gemini 3.1 Flash Lite Image (Nano Banana 2 Lite)","description":"Gemini 3.1 Flash-Lite Image (Nano Banana 2 Lite) is Google's fastest image generation model enabling rapid creation and iteration.","context_window":65536,"max_tokens":4096,"type":"language","tags":["image-generation","implicit-caching","reasoning","vision","web-search"],"pricing":{"input":"0.00000025","output":"0.0000015","input_cache_read":"0.00000003","image_dimension_quality_pricing":[{"size":"default","cost":"0.034"},{"size":"1K","cost":"0.034"}],"web_search":"14","maps_search":"14"}},{"id":"google/gemini-3.1-flash-lite-preview","object":"model","created":1755815280,"released":1772496000,"owned_by":"google","name":"Gemini 3.1 Flash Lite Preview","description":"Gemini 3.1 Flash Lite Preview outperforms 2.5 Flash Lite on overall quality and lands close to 2.5 Flash performance across key capability areas. It is a workhorse model for high-volume use cases, with improvements across audio input/ASR, RAG snippet ranking, translation, data extraction, and code completion.","context_window":1000000,"max_tokens":65000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision","web-search"],"pricing":{"input":"0.00000025","output":"0.0000015","input_cache_read":"0.00000003","web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.00000045","output":"0.0000027","input_cache_read":"0.000000045"},"flex":{"input":"0.000000125","output":"0.00000075","input_cache_read":"0.0000000125"}}}},{"id":"google/gemini-3.1-pro-preview","object":"model","created":1755815280,"released":1763424000,"owned_by":"google","name":"Gemini 3.1 Pro Preview","description":"This model improves upon Gemini 2.5 Pro and is catered towards challenging tasks, especially those involving complex reasoning or agentic workflows. Improvements highlighted include use cases for coding, multi-step function calling, planning, reasoning, deep knowledge tasks, and instruction following.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["file-input","tool-use","reasoning","vision","web-search","implicit-caching"],"pricing":{"input":"0.000002","input_tiers":[{"cost":"0.000002","min":0,"max":200001},{"cost":"0.000004","min":200001}],"output":"0.000012","output_tiers":[{"cost":"0.000012","min":0,"max":200001},{"cost":"0.000018","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.0000036","output":"0.0000216","input_cache_read":"0.00000036","long_context":{"threshold":200000,"input":"0.0000072","output":"0.0000324","input_cache_read":"0.00000072"}},"flex":{"input":"0.000001","output":"0.000006","long_context":{"threshold":200000,"input":"0.000002","output":"0.000009"}}}}},{"id":"google/gemini-3.5-flash","object":"model","created":1755815280,"released":1779148800,"owned_by":"google","name":"Gemini 3.5 Flash","description":"Google's latest model, highly optimized for coding proficiency and parallel agentic execution loops. Defaults to medium thinking effort for faster and more cost-efficient responses.","context_window":1000000,"max_tokens":64000,"type":"language","tags":["reasoning","file-input","vision","tool-use","web-search","implicit-caching"],"regions":["eu","us"],"pricing":{"input":"0.0000015","output":"0.000009","input_cache_read":"0.00000015","web_search":"14","maps_search":"14","service_tiers":{"priority":{"input":"0.0000027","output":"0.0000162","input_cache_read":"0.00000027"},"flex":{"input":"0.00000075","output":"0.0000045","input_cache_read":"0.00000008"}}}},{"id":"google/gemini-embedding-001","object":"model","created":1755815280,"released":1747699200,"owned_by":"google","name":"Gemini Embedding 001","description":"State-of-the-art embedding model with excellent performance across English, multilingual and code tasks.","context_window":0,"max_tokens":0,"type":"embedding","regions":["us"],"pricing":{"input":"0.00000015"}},{"id":"google/gemini-embedding-2","object":"model","created":1755815280,"released":1773100800,"owned_by":"google","name":"Gemini Embedding 2","description":"Google’s first fully multimodal Embedding model that is capable of mapping text, image, video, audio, and PDFs and their interleaved combinations thereof into a single, unified vector space. Built on the Gemini architecture, it supports 100+ languages.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.0000002"}},{"id":"google/gemini-omni-flash-preview","object":"model","created":1755815280,"released":1782777600,"owned_by":"google","name":"Gemini Omni Flash Preview","description":"Gemini Omni Flash (Preview) is a multimodal model designed for video, image, and text tasks. It is optimized for video generation, offering video output alongside text responses in a single model.","context_window":1000000,"max_tokens":57920,"type":"language","tags":["file-input","reasoning","vision","video-generation"],"pricing":{"input":"0.0000015","output":"0.000009"}},{"id":"google/gemma-4-26b-a4b-it","object":"model","created":1755815280,"released":1775088000,"owned_by":"google","name":"Gemma 4 26B A4B IT","description":"Gemma is a family of open models built by Google DeepMind. Gemma 4 models are multimodal, handling text and image input (with audio supported on small models) and generating text output. This release includes open-weights models in both pre-trained and instruction-tuned variants. Gemma 4 features a context window of up to 256K tokens and maintains multilingual support in over 140 languages.","context_window":262144,"max_tokens":131072,"type":"language","tags":["file-input","reasoning","tool-use","vision","implicit-caching"],"pricing":{"input":"0.00000015","output":"0.0000006","input_cache_read":"0.000000015"}},{"id":"google/gemma-4-31b-it","object":"model","created":1755815280,"released":1775088000,"owned_by":"google","name":"Gemma 4 31B IT","description":"Gemma 4 31B is engineered to tackle the most demanding enterprise workloads and complex reasoning tasks. With an expansive 256K-token context window, the 31B model can effortlessly ingest entire codebases, and massive sets of images in a single prompt.","context_window":262144,"max_tokens":131072,"type":"language","tags":["file-input","reasoning","tool-use","vision"],"pricing":{"input":"0.00000014","output":"0.0000004"}},{"id":"google/imagen-4.0-fast-generate-001","object":"model","created":1755815280,"released":1748736000,"owned_by":"google","name":"Imagen 4 Fast","description":"Imagen 4 Fast is Google’s speed-optimized variant of the Imagen 4 text-to-image model, designed for rapid, high-volume image generation. It’s ideal for workflows like quick drafts, mockups, and iterative creative exploration. Despite emphasizing speed, it still benefits from the broader Imagen 4 family’s improvements in clarity, text rendering, and stylistic flexibility, and supports high-resolution outputs up to 2K.","context_window":480,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.02"}},{"id":"google/imagen-4.0-generate-001","object":"model","created":1755815280,"released":1747699200,"owned_by":"google","name":"Imagen 4","description":"Imagen 4: Google's flagship text-to-image model that serves as the go-to choice for a wide variety of high-quality image generation tasks, featuring significant improvements in text rendering over previous models. It now supports up to 2K resolution generation for creating detailed and crisp visuals, making it suitable for everything from marketing assets to artistic compositions.","context_window":480,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04"}},{"id":"google/imagen-4.0-ultra-generate-001","object":"model","created":1755815280,"released":1748044800,"owned_by":"google","name":"Imagen 4 Ultra","description":"Imagen 4 Ultra: Highest quality image generation model for detailed and photorealistic outputs.","context_window":480,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.06"}},{"id":"google/text-embedding-005","object":"model","created":1755815280,"released":1722470400,"owned_by":"google","name":"Text Embedding 005","description":"English-focused text embedding model optimized for code and English language tasks.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.000000025"}},{"id":"google/text-multilingual-embedding-002","object":"model","created":1755815280,"released":1709251200,"owned_by":"google","name":"Text Multilingual Embedding 002","description":"Multilingual text embedding model optimized for cross-lingual tasks across many languages.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.000000025"}},{"id":"google/veo-3.0-fast-generate-001","object":"model","created":1755815280,"released":1753920000,"owned_by":"google","name":"Veo 3.0 Fast Generate","description":"Veo 3 Fast is a quicker and more cost effective version of Veo 3, allowing developers to create videos with sound while maintaining high quality and optimizing for speed and business use cases. Veo 3 Fast offers both text-to-video and image-to-video modalities.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video","image-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16"],"supported_durations_seconds":[4,6,8],"generate_audio":true,"supported_fps":[24],"max_sample_count":4,"input_limits":{"image":{"max_count":1,"max_file_size_mb":20,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64"]}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","audio":false,"cost_per_second":"0.1"},{"resolution":"720p","audio":true,"cost_per_second":"0.15"},{"resolution":"1080p","audio":false,"cost_per_second":"0.1"},{"resolution":"1080p","audio":true,"cost_per_second":"0.15"}]}},{"id":"google/veo-3.0-generate-001","object":"model","created":1755815280,"released":1747699200,"owned_by":"google","name":"Veo 3.0","description":"Veo 3 is designed to handle a range of video generation tasks, from cinematic narratives to dynamic character animations. With Veo 3, you can create more immersive experiences by not only generating stunning visuals, but also audio like dialogue and sound effects.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video","image-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16"],"supported_durations_seconds":[4,6,8],"generate_audio":true,"supported_fps":[24],"max_sample_count":4,"input_limits":{"image":{"max_count":1,"max_file_size_mb":20,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64"]}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","audio":false,"cost_per_second":"0.2"},{"resolution":"720p","audio":true,"cost_per_second":"0.4"},{"resolution":"1080p","audio":false,"cost_per_second":"0.2"},{"resolution":"1080p","audio":true,"cost_per_second":"0.4"}]}},{"id":"google/veo-3.1-fast-generate-001","object":"model","created":1755815280,"released":1760486400,"owned_by":"google","name":"Veo 3.1 Fast Generate","description":"Veo 3.1 Fast is a specialized, high-speed variant of Google DeepMind’s Veo 3.1 text-to-video model, optimized for rapid generation of 8-second, high-fidelity videos. It is designed to create cinematic, 1080p, or 720p content with improved prompt adherence and native audio, making it ideal for creating quick, high-quality video clips, social media content, and ad creatives.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame","reference-to-video","extend-video"],"supported_resolutions":["720p","1080p","4k"],"supported_aspect_ratios":["16:9","9:16"],"supported_durations_seconds":[4,6,8],"generate_audio":true,"supported_fps":[24],"max_sample_count":4,"input_limits":{"image":{"max_count":3,"max_file_size_mb":20,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64"]},"video":{"max_count":1,"supported_formats":["mp4"],"supported_sources":["url"]}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","audio":false,"cost_per_second":"0.1"},{"resolution":"720p","audio":true,"cost_per_second":"0.15"},{"resolution":"1080p","audio":false,"cost_per_second":"0.1"},{"resolution":"1080p","audio":true,"cost_per_second":"0.15"},{"resolution":"4k","audio":false,"cost_per_second":"0.3"},{"resolution":"4k","audio":true,"cost_per_second":"0.35"}]}},{"id":"google/veo-3.1-generate-001","object":"model","created":1755815280,"released":1760486400,"owned_by":"google","name":"Veo 3.1","description":"Veo 3.1 is Google's state-of-the-art model for generating high-fidelity, 8-second 720p, 1080p or 4k videos featuring stunning realism and natively generated audio.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video","image-to-video","first-last-frame","reference-to-video","extend-video"],"supported_resolutions":["720p","1080p","4k"],"supported_aspect_ratios":["16:9","9:16"],"supported_durations_seconds":[4,6,8],"generate_audio":true,"supported_fps":[24],"max_sample_count":4,"input_limits":{"image":{"max_count":3,"max_file_size_mb":20,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64"]},"video":{"max_count":1,"supported_formats":["mp4"],"supported_sources":["url"]}}},"pricing":{"video_duration_pricing":[{"resolution":"720p","audio":false,"cost_per_second":"0.2"},{"resolution":"720p","audio":true,"cost_per_second":"0.4"},{"resolution":"1080p","audio":false,"cost_per_second":"0.2"},{"resolution":"1080p","audio":true,"cost_per_second":"0.4"},{"resolution":"4k","audio":false,"cost_per_second":"0.4"},{"resolution":"4k","audio":true,"cost_per_second":"0.6"}]}},{"id":"inception/mercury-2","object":"model","created":1755815280,"released":1771891200,"owned_by":"inception","name":"Mercury 2","description":"A diffusion-based reasoning LLM that generates text via parallel refinement (not token-by-token), delivering real-time latency with ~1k tokens/sec plus 128K context and built-in tool/JSON support.","context_window":128000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000025","output":"0.00000075","input_cache_read":"0.000000025"}},{"id":"inception/mercury-coder-small","object":"model","created":1755815280,"released":1740528000,"owned_by":"inception","name":"Mercury Coder Small Beta","description":"Mercury Coder Small is ideal for code generation, debugging, and refactoring tasks with minimal latency.","context_window":32000,"max_tokens":16384,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000025","output":"0.000001"}},{"id":"interfaze/interfaze-beta","object":"model","created":1755815280,"released":1759795200,"owned_by":"interfaze","name":"Interfaze Beta","description":"Interfaze is an AI model built on a new architecture that merges specialized DNN/CNN models with LLMs for developer tasks that require deterministic output and high consistency like OCR, scraping, classification, STT and more.","context_window":1000000,"max_tokens":32000,"type":"language","tags":["file-input","reasoning","tool-use","vision"],"pricing":{"input":"0.0000015","output":"0.0000035"}},{"id":"klingai/kling-v2.5-turbo-i2v","object":"model","created":1755815280,"released":1758585600,"owned_by":"klingai","name":"Kling v2.5 Turbo Image-to-Video","description":"Kling 2.5 Turbo is a major update to the AI video generation model focused on significantly improving speed, video quality, temporal stability, and creative control for creators, making professional-grade AI-generated video faster, more coherent, and easier to direct from text prompts.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video","first-last-frame"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[5,10],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500},"image":{"max_count":2,"max_file_size_mb":10,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64","buffer"],"min_dimension_pixels":300,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.042"},{"mode":"pro","cost_per_second":"0.07"}]}},{"id":"klingai/kling-v2.5-turbo-t2v","object":"model","created":1755815280,"released":1758585600,"owned_by":"klingai","name":"Kling v2.5 Turbo Text-to-Video","description":"Kling 2.5 Turbo is a major update to the AI video generation model focused on significantly improving speed, video quality, temporal stability, and creative control for creators, making professional-grade AI-generated video faster, more coherent, and easier to direct from text prompts.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[5,10],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.042"},{"mode":"pro","cost_per_second":"0.07"}]}},{"id":"klingai/kling-v2.6-i2v","object":"model","created":1755815280,"released":1764720000,"owned_by":"klingai","name":"Kling v2.6 Image-to-Video","description":"Kling 2.6 introduces a groundbreaking \"Native Audio\" capability, enabling the generation of complete videos in a single go, including natural voice, action sound effects, and environmental ambient sounds, providing an immersive \"what you see if what you hear\" experience. ","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video","first-last-frame"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[5,10],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500},"image":{"max_count":2,"max_file_size_mb":10,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64","buffer"],"min_dimension_pixels":300,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.042"},{"audio":false,"mode":"pro","cost_per_second":"0.07"},{"audio":true,"mode":"pro","cost_per_second":"0.14"}]}},{"id":"klingai/kling-v2.6-motion-control","object":"model","created":1755815280,"released":1766016000,"owned_by":"klingai","name":"Kling v2.6 Motion Control","description":"Kling 2.6 introduces a groundbreaking \"Native Audio\" capability, enabling the generation of complete videos in a single go, including natural voice, action sound effects, and environmental ambient sounds, providing an immersive \"what you see if what you hear\" experience. ","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["motion-control"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],"generate_audio":false,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500},"image":{"max_count":1,"max_file_size_mb":10,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64","buffer"],"min_dimension_pixels":300,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"},"video":{"max_count":1,"max_file_size_mb":100,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30,"min_dimension_pixels":340,"max_dimension_pixels":3850}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.07"},{"mode":"pro","cost_per_second":"0.112"}]}},{"id":"klingai/kling-v2.6-t2v","object":"model","created":1755815280,"released":1764720000,"owned_by":"klingai","name":"Kling v2.6 Text-to-Video","description":"Kling 2.6 introduces a groundbreaking \"Native Audio\" capability, enabling the generation of complete videos in a single go, including natural voice, action sound effects, and environmental ambient sounds, providing an immersive \"what you see if what you hear\" experience. ","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[5,10],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.042"},{"audio":false,"mode":"pro","cost_per_second":"0.07"},{"audio":true,"mode":"pro","cost_per_second":"0.14"}]}},{"id":"klingai/kling-v3.0-i2v","object":"model","created":1755815280,"released":1770249600,"owned_by":"klingai","name":"Kling v3.0 Image-to-Video","description":"Build upon an All-in-One product framework, the Kling 3.0 model series supports full multimodal input and output spanning text, images, audio, and video, bringing the understanding, generation, and editing of video together in one streamlined AI workflow. The models integrate multiple tasks, including text-to-video, image-to-video, reference-to-video, and in-video editing, into a single, native multimodal architecture, enabling the models to follow complex narrative logic, deliver precise shot control, and maintain strong prompt adherence.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video","first-last-frame","reference-to-video"],"supported_resolutions":["720p","1080p","4k"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500},"image":{"max_count":3,"max_file_size_mb":10,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64","buffer"],"min_dimension_pixels":300,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"}}},"pricing":{"video_duration_pricing":[{"audio":false,"mode":"std","cost_per_second":"0.168"},{"audio":true,"mode":"std","cost_per_second":"0.252"},{"audio":true,"mode":"std","voice_control":true,"cost_per_second":"0.308"},{"audio":false,"mode":"pro","cost_per_second":"0.224"},{"audio":true,"mode":"pro","cost_per_second":"0.336"},{"audio":true,"mode":"pro","voice_control":true,"cost_per_second":"0.392"}]}},{"id":"klingai/kling-v3.0-motion-control","object":"model","created":1755815280,"released":1772582400,"owned_by":"klingai","name":"Kling v3.0 Motion Control","description":"Kling 3.0 delivers a major leap in character fidelity for motion-driven generation, with stable facial features across multi-angle and long-duration motion, accurate complex emotions from multi-image face references, identity preservation through partial occlusions (hats, hands, fans), and steady clarity as the camera zooms, pans, or tracks.","context_window":0,"max_tokens":0,"type":"video","tags":["video-generation"],"video_capabilities":{"supported_operations":["motion-control"],"supported_resolutions":["720p","1080p"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],"generate_audio":false,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500},"image":{"max_count":1,"max_file_size_mb":10,"supported_formats":["jpg","jpeg","png"],"supported_sources":["url","base64","buffer"],"min_dimension_pixels":300,"min_aspect_ratio":"2:5","max_aspect_ratio":"5:2"},"video":{"max_count":1,"max_file_size_mb":100,"supported_formats":["mp4","mov"],"supported_sources":["url"],"min_duration_seconds":3,"max_duration_seconds":30,"min_dimension_pixels":340,"max_dimension_pixels":3850}}},"pricing":{"video_duration_pricing":[{"mode":"std","cost_per_second":"0.126"},{"mode":"pro","cost_per_second":"0.168"}]}},{"id":"klingai/kling-v3.0-t2v","object":"model","created":1755815280,"released":1770249600,"owned_by":"klingai","name":"Kling v3.0 Text-to-Video","description":"Build upon an All-in-One product framework, the Kling 3.0 model series supports full multimodal input and output spanning text, images, audio, and video, bringing the understanding, generation, and editing of video together in one streamlined AI workflow. The models integrate multiple tasks, including text-to-video, image-to-video, reference-to-video, and in-video editing, into a single, native multimodal architecture, enabling the models to follow complex narrative logic, deliver precise shot control, and maintain strong prompt adherence.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video"],"supported_resolutions":["720p","1080p","4k"],"supported_aspect_ratios":["16:9","9:16","1:1"],"supported_durations_seconds":[3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[30],"input_limits":{"text":{"max_chars":2500}}},"pricing":{"video_duration_pricing":[{"audio":false,"mode":"std","cost_per_second":"0.168"},{"audio":true,"mode":"std","cost_per_second":"0.252"},{"audio":true,"mode":"std","voice_control":true,"cost_per_second":"0.308"},{"audio":false,"mode":"pro","cost_per_second":"0.224"},{"audio":true,"mode":"pro","cost_per_second":"0.336"},{"audio":true,"mode":"pro","voice_control":true,"cost_per_second":"0.392"}]}},{"id":"kwaipilot/kat-coder-air-v2.5","object":"model","created":1755815280,"released":1783641600,"owned_by":"kwaipilot","name":"Kat Coder Air V2.5","description":"Fast response version of Kat Coder V2.5 optimized for Agent and Claw use cases.","context_window":256000,"max_tokens":80000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000015","output":"0.0000006","input_cache_read":"0.00000003"}},{"id":"kwaipilot/kat-coder-pro-v1","object":"model","created":1755815280,"released":1762646400,"owned_by":"kwaipilot","name":"KAT-Coder-Pro V1","description":"KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KwaiKAT series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving a remarkable 73.4% solve rate on the SWE-Bench Verified benchmark. KAT-Coder-Pro V1 delivers top-tier coding performance and has been rigorously tested by thousands of in-house engineers. The model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.","context_window":256000,"max_tokens":32000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000006"}},{"id":"kwaipilot/kat-coder-pro-v2","object":"model","created":1755815280,"released":1774569600,"owned_by":"kwaipilot","name":"Kat Coder Pro V2","description":"A high-performance edition designed for complex enterprise projects and SaaS integration.","context_window":256000,"max_tokens":256000,"type":"language","tags":["tool-use","reasoning","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000006"}},{"id":"kwaipilot/kat-coder-pro-v2.5","object":"model","created":1755815280,"released":1783641600,"owned_by":"kwaipilot","name":"Kat Coder Pro V2.5","description":"KAT-Coder-V2.5 is a coding-focused agentic model trained to act autonomously inside real, executable repositories rather than as a single-turn code generator.","context_window":256000,"max_tokens":80000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000074","output":"0.00000296","input_cache_read":"0.00000015"}},{"id":"meta/llama-3.1-70b","object":"model","created":1755815280,"released":1721692800,"owned_by":"meta","name":"Llama 3.1 70B Instruct","description":"An update to Meta Llama 3 70B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000072","output":"0.00000072"}},{"id":"meta/llama-3.1-8b","object":"model","created":1755815280,"released":1721692800,"owned_by":"meta","name":"Llama 3.1 8B Instruct","description":"An update to Meta Llama 3 8B Instruct that includes an expanded 128K context length, multilinguality and improved reasoning capabilities.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use","implicit-caching"],"pricing":{"input":"0.00000022","output":"0.00000022"}},{"id":"meta/llama-3.3-70b","object":"model","created":1755815280,"released":1733443200,"owned_by":"meta","name":"Llama 3.3 70B Instruct","description":"Where performance meets efficiency. This model supports high-performance conversational AI designed for content creation, enterprise applications, and research, offering advanced language understanding capabilities, including text summarization, classification, sentiment analysis, and code generation.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000072","output":"0.00000072"}},{"id":"meta/llama-4-maverick","object":"model","created":1755815280,"released":1743811200,"owned_by":"meta","name":"Llama 4 Maverick 17B Instruct","description":"As a general purpose LLM, Llama 4 Maverick contains 17 billion active parameters, 128 experts, and 400 billion total parameters, offering high quality at a lower price compared to Llama 3.3 70B.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.00000024","output":"0.00000097"}},{"id":"meta/llama-4-scout","object":"model","created":1755815280,"released":1743811200,"owned_by":"meta","name":"Llama 4 Scout 17B Instruct","description":"Llama 4 Scout is the best multimodal model in the world in its class and is more powerful than our Llama 3 models, while fitting in a single H100 GPU. Additionally, Llama 4 Scout supports an industry-leading context window of up to 10M tokens.","context_window":128000,"max_tokens":8192,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.00000017","output":"0.00000066"}},{"id":"meta/muse-spark-1.1","object":"model","created":1755815280,"released":1783555200,"owned_by":"meta","name":"Muse Spark 1.1","description":"Muse Spark 1.1 is strongest at agentic performance, tool use, and computer use. It does well on long-running tasks with 1M token context window, can delegate execution to sub-agents running in parallel, and is trained to use computer interfaces on desktop, mobile, or browser.","context_window":1048576,"max_tokens":1048576,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"pricing":{"input":"0.00000125","output":"0.00000425","input_cache_read":"0.00000015"}},{"id":"minimax/minimax-m2","object":"model","created":1755815280,"released":1761523200,"owned_by":"minimax","name":"MiniMax M2","description":"MiniMax-M2 redefines efficiency for agents. It is a compact, fast, and cost-effective MoE model (230 billion total parameters with 10 billion active parameters) built for elite performance in coding and agentic tasks, all while maintaining powerful general intelligence.","context_window":205000,"max_tokens":205000,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.1","object":"model","created":1755815280,"released":1766448000,"owned_by":"minimax","name":"MiniMax M2.1","description":"MiniMax 2.1 is MiniMax's latest model, optimized specifically for robustness in coding, tool use, instruction following, and long-horizon planning.","context_window":204800,"max_tokens":131072,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.1-lightning","object":"model","created":1755815280,"released":1766448000,"owned_by":"minimax","name":"MiniMax M2.1 Lightning","description":"MiniMax-M2.1-lightning is a faster version of MiniMax-M2.1, offering the same performance but with significantly higher throughput (output speed ~100 TPS, MiniMax-M2 output speed ~60 TPS).","context_window":204800,"max_tokens":131072,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.5","object":"model","created":1755815280,"released":1770854400,"owned_by":"minimax","name":"MiniMax M2.5","description":"MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. It is capable of handling the entire development process of various complex systems. It covers full-stack projects across multiple platforms including Web, Android, iOS, Windows, and Mac, encompassing server-side APIs, functional logic, and databases.","context_window":204800,"max_tokens":131000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000003","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.5-highspeed","object":"model","created":1755815280,"released":1770854400,"owned_by":"minimax","name":"MiniMax M2.5 High Speed","description":"M2.5 highspeed: Same performance, faster and more agile (output speed approximately 100 tps)","context_window":204800,"max_tokens":131000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000006","output":"0.0000024","input_cache_read":"0.00000003","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.7","object":"model","created":1755815280,"released":1773792000,"owned_by":"minimax","name":"MiniMax M2.7","description":"M2.7 delivers outstanding performance in real-world software engineering, including end-to-end full project delivery, log analysis and bug troubleshooting, code security, machine learning, and more.","context_window":204800,"max_tokens":131000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000012","input_cache_read":"0.00000006","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m2.7-highspeed","object":"model","created":1755815280,"released":1773792000,"owned_by":"minimax","name":"MiniMax M2.7 High Speed","description":"M2.7 Highspeed: Same performance, faster and more agile (output speed approximately 100 tps)","context_window":204800,"max_tokens":131100,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000006","output":"0.0000024","input_cache_read":"0.00000006","input_cache_write":"0.000000375"}},{"id":"minimax/minimax-m3","object":"model","created":1755815280,"released":1780185600,"owned_by":"minimax","name":"MiniMax M3","description":"MiniMax-M3 is a frontier-class foundation model that unites the three capabilities defining today's frontier: a 1M-token context window, frontier coding and agentic performance, and native multimodality — the first open-weight model to deliver all three in a single system.","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["reasoning","tool-use","vision","file-input","implicit-caching"],"pricing":{"input":"0.0000003","input_tiers":[{"cost":"0.0000003","min":0,"max":512000},{"cost":"0.0000012","min":512000}],"output":"0.0000012","output_tiers":[{"cost":"0.0000012","min":0,"max":512000},{"cost":"0.0000048","min":512000}],"input_cache_read":"0.00000006","input_cache_read_tiers":[{"cost":"0.00000006","min":0,"max":512000},{"cost":"0.00000024","min":512000}]}},{"id":"mistral/codestral","object":"model","created":1755815280,"released":1716940800,"owned_by":"mistral","name":"Mistral Codestral","description":"Mistral's cutting-edge language model for coding released end of July 2025, Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.","context_window":128000,"max_tokens":4000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000003","output":"0.0000009"}},{"id":"mistral/codestral-embed","object":"model","created":1755815280,"released":1748390400,"owned_by":"mistral","name":"Codestral Embed","description":"Code embedding model that can embed code databases and repositories to power coding assistants.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000015"}},{"id":"mistral/devstral-2","object":"model","created":1755815280,"released":1765238400,"owned_by":"mistral","name":"Devstral 2","description":"An enterprise-grade text model that excels at using tools to explore codebases, editing multiple files, and powering software engineering agents.","context_window":256000,"max_tokens":256000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000004","output":"0.000002"}},{"id":"mistral/devstral-small-2","object":"model","created":1755815280,"released":1765238400,"owned_by":"mistral","name":"Devstral Small 2","description":"Our open source model that excels at using tools to explore codebases, editing multiple files, and powering software engineering agents.","context_window":256000,"max_tokens":256000,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.0000001","output":"0.0000003"}},{"id":"mistral/magistral-medium","object":"model","created":1755815280,"released":1749513600,"owned_by":"mistral","name":"Magistral Medium 2509","description":"Complex thinking, backed by deep understanding, with transparent reasoning you can follow and verify. The model excels in maintaining high-fidelity reasoning across numerous languages, even when switching between languages mid-task.","context_window":128000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use","vision"],"pricing":{"input":"0.000002","output":"0.000005"}},{"id":"mistral/magistral-small","object":"model","created":1755815280,"released":1749513600,"owned_by":"mistral","name":"Magistral Small 2509","description":"Complex thinking, backed by deep understanding, with transparent reasoning you can follow and verify. The model excels in maintaining high-fidelity reasoning across numerous languages, even when switching between languages mid-task.","context_window":128000,"max_tokens":64000,"type":"language","tags":["reasoning","tool-use","vision"],"pricing":{"input":"0.0000005","output":"0.0000015"}},{"id":"mistral/ministral-14b","object":"model","created":1755815280,"released":1764633600,"owned_by":"mistral","name":"Ministral 14B","description":"Ministral 3 14B is the largest model in the Ministral 3 family, offering state-of-the-art capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. Optimized for local deployment, it delivers high performance across diverse hardware, including local setups.","context_window":256000,"max_tokens":256000,"type":"language","tags":["file-input","tool-use","vision"],"pricing":{"input":"0.0000002","output":"0.0000002"}},{"id":"mistral/ministral-3b","object":"model","created":1755815280,"released":1729036800,"owned_by":"mistral","name":"Ministral 3B","description":"A compact, efficient model for on-device tasks like smart assistants and local analytics, offering low-latency performance.","context_window":128000,"max_tokens":4000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000001","output":"0.0000001"}},{"id":"mistral/ministral-8b","object":"model","created":1755815280,"released":1729036800,"owned_by":"mistral","name":"Ministral 8B","description":"A more powerful model with faster, memory-efficient inference, ideal for complex workflows and demanding edge applications.","context_window":128000,"max_tokens":4000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000015","output":"0.00000015"}},{"id":"mistral/mistral-embed","object":"model","created":1755815280,"released":1702252800,"owned_by":"mistral","name":"Mistral Embed","description":"General-purpose text embedding model for semantic search, similarity, clustering, and RAG workflows.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.0000001"}},{"id":"mistral/mistral-large-3","object":"model","created":1755815280,"released":1764633600,"owned_by":"mistral","name":"Mistral Large 3","description":"Mistral Large 3 2512 is Mistral’s most capable model to date. It has a sparse mixture-of-experts architecture with 41B active parameters (675B total).","context_window":256000,"max_tokens":256000,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.0000005","output":"0.0000015"}},{"id":"mistral/mistral-medium","object":"model","created":1755815280,"released":1746576000,"owned_by":"mistral","name":"Mistral Medium 3.1","description":"Mistral Medium 3 delivers frontier performance while being an order of magnitude less expensive. For instance, the model performs at or above 90% of Claude Sonnet 3.7 on benchmarks across the board at a significantly lower cost.","context_window":128000,"max_tokens":64000,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.0000004","output":"0.000002"}},{"id":"mistral/mistral-medium-3.5","object":"model","created":1755815280,"released":1777420800,"owned_by":"mistral","name":"Mistral Medium Latest","description":"Mistral's frontier-class multimodal model optimized for agentic and coding use cases.","context_window":256000,"max_tokens":256000,"type":"language","tags":["reasoning","tool-use","vision"],"pricing":{"input":"0.0000015","output":"0.0000075"}},{"id":"mistral/mistral-nemo","object":"model","created":1755815280,"released":1721260800,"owned_by":"mistral","name":"Mistral Nemo 12B","description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi. It supports function calling and is released under the Apache 2.0 license.","context_window":128000,"max_tokens":128000,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000015","output":"0.00000015"}},{"id":"mistral/mistral-small","object":"model","created":1755815280,"released":1726531200,"owned_by":"mistral","name":"Mistral Small","description":"Mistral Small is the ideal choice for simple tasks that one can do in bulk - like Classification, Customer Support, or Text Generation. It offers excellent performance at an affordable price point.","context_window":32000,"max_tokens":4000,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.0000001","output":"0.0000003"}},{"id":"mistral/pixtral-12b","object":"model","created":1755815280,"released":1726531200,"owned_by":"mistral","name":"Pixtral 12B 2409","description":"A 12B model with image understanding capabilities in addition to text.","context_window":128000,"max_tokens":4000,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.00000015","output":"0.00000015"}},{"id":"moonshotai/kimi-k2","object":"model","created":1755815280,"released":1752192000,"owned_by":"moonshotai","name":"Kimi K2 Instruct","description":"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.","context_window":131072,"max_tokens":131072,"type":"language","tags":["tool-use"],"pricing":{"input":"0.00000057","output":"0.0000023"}},{"id":"moonshotai/kimi-k2-thinking","object":"model","created":1755815280,"released":1762387200,"owned_by":"moonshotai","name":"Kimi K2 Thinking","description":"Kimi K2 Thinking is an advanced open-source thinking model by Moonshot AI. It can execute up to 200 – 300 sequential tool calls without human interference, reasoning coherently across hundreds of steps to solve complex problems. Built as a thinking agent, it reasons step by step while using tools, achieving state-of-the-art performance on Humanity's Last Exam (HLE), BrowseComp, and other benchmarks, with major gains in reasoning, agentic search, coding, writing, and general capabilities.","context_window":216144,"max_tokens":216144,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000047","output":"0.000002","input_cache_read":"0.000000141"}},{"id":"moonshotai/kimi-k2.5","object":"model","created":1755815280,"released":1769385600,"owned_by":"moonshotai","name":"Kimi K2.5","description":"kimi-k2.5 is Kimi's most versatile model to date, featuring a native multimodal architecture that supports both visual and text input, thinking and non-thinking modes, and dialogue and agent tasks.","context_window":262114,"max_tokens":262114,"type":"language","tags":["reasoning","vision","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000006","output":"0.000003","input_cache_read":"0.0000001"}},{"id":"moonshotai/kimi-k2.6","object":"model","created":1755815280,"released":1776643200,"owned_by":"moonshotai","name":"Kimi K2.6","description":"Kimi K2.6 demonstrates particularly strong performance in long-horizon coding tasks and produces professional-grade design with code and vision.","context_window":262000,"max_tokens":262000,"type":"language","tags":["implicit-caching","reasoning","tool-use","vision"],"regions":["us"],"pricing":{"input":"0.00000095","output":"0.000004","input_cache_read":"0.00000016"}},{"id":"moonshotai/kimi-k2.7-code","object":"model","created":1755815280,"released":1781222400,"owned_by":"moonshotai","name":"Kimi K2.7 Code","description":"Kimi-K2.7-Code is a coding model from Moonshot AI. It has  improved coding & agent performance over K2.6, more reasoning efficiency with less overthinking, and improved instruction following for long-horizon coding.","context_window":256000,"max_tokens":32768,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"regions":["us"],"pricing":{"input":"0.00000095","output":"0.000004","input_cache_read":"0.00000019"}},{"id":"moonshotai/kimi-k2.7-code-highspeed","object":"model","created":1755815280,"released":1781481600,"owned_by":"moonshotai","name":"Kimi K2.7 Code High Speed","description":"Kimi K2.7 Code HighSpeed is the high-speed version of Kimi K2.7 Code, the same model as Kimi K2.7 Code, but with an output speed of approximately 180 Tokens/s and up to 260 Tokens/s in short context scenarios, delivering a more extreme coding experience.","context_window":262144,"max_tokens":32768,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"pricing":{"input":"0.0000019","output":"0.000008","input_cache_read":"0.00000038"}},{"id":"moonshotai/kimi-k3","object":"model","created":1755815280,"released":1784160000,"owned_by":"moonshotai","name":"Kimi K3","description":"Kimi’s flagship model for long-horizon coding and end-to-end knowledge work, with a 1M-token context window.","context_window":1000000,"max_tokens":131072,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"pricing":{"input":"0.000003","output":"0.000015","input_cache_read":"0.0000003"}},{"id":"morph/morph-v3-fast","object":"model","created":1755815280,"released":1751846400,"owned_by":"morph","name":"Morph V3 Fast","description":"Morph offers a specialized AI model that applies code changes suggested by frontier models (like Claude or GPT-4o) to your existing code files FAST - 4500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens.","context_window":81920,"max_tokens":16384,"type":"language","pricing":{"input":"0.0000008","output":"0.0000012"}},{"id":"morph/morph-v3-large","object":"model","created":1755815280,"released":1751846400,"owned_by":"morph","name":"Morph V3 Large","description":"Morph offers a specialized AI model that applies code changes suggested by frontier models (like Claude or GPT-4o) to your existing code files FAST - 2500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens.","context_window":81920,"max_tokens":16384,"type":"language","pricing":{"input":"0.0000009","output":"0.0000019"}},{"id":"nvidia/nemotron-3-nano-30b-a3b","object":"model","created":1755815280,"released":1765756800,"owned_by":"nvidia","name":"Nemotron 3 Nano 30B A3B","description":"NVIDIA Nemotron 3 Nano is an open reasoning model optimized for fast, cost-efficient inference. Built with a hybrid MoE and Mamba architecture and trained on NVIDIA-curated synthetic reasoning data, it delivers strong multi-step reasoning with stable latency and predictable performance for agentic and production workloads.","context_window":262144,"max_tokens":262144,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000005","output":"0.00000024"}},{"id":"nvidia/nemotron-3-super-120b-a12b","object":"model","created":1755815280,"released":1773187200,"owned_by":"nvidia","name":"NVIDIA Nemotron 3 Super 120B A12B","description":"NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. It delivers up to 7x higher throughput, providing fast, cost-efficient inference for agentic tasks. Additionally, a long context window gives the model long-term memory, preventing AI agents from losing focus on long, multi-step tasks and ensuring high-accuracy results. Fully open with weights, datasets, and recipes, Super allows easy customization and secure deployment anywhere.","context_window":256000,"max_tokens":32000,"type":"language","tags":["reasoning","tool-use"],"regions":["us"],"pricing":{"input":"0.00000015","output":"0.00000065"}},{"id":"nvidia/nemotron-3-ultra-550b-a55b","object":"model","created":1755815280,"released":1780531200,"owned_by":"nvidia","name":"Nemotron 3 Ultra","description":"A 550B parameter (55B active) open reasoning model from NVIDIA, built for long-running agent workflows. It uses a hybrid Mamba-Transformer MoE architecture and supports a 1M token context window.","context_window":1000000,"max_tokens":65000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000006","output":"0.0000024","input_cache_read":"0.00000012"}},{"id":"nvidia/nemotron-nano-12b-v2-vl","object":"model","created":1755815280,"released":1761609600,"owned_by":"nvidia","name":"Nvidia Nemotron Nano 12B V2 VL","description":"The model is an auto-regressive vision language model that uses an optimized transformer architecture. The model enables multi-image reasoning and video understanding, along with strong document intelligence, visual Q&A and summarization capabilities.","context_window":131072,"max_tokens":131072,"type":"language","tags":["vision","reasoning","tool-use"],"pricing":{"input":"0.0000002","output":"0.0000006"}},{"id":"nvidia/nemotron-nano-9b-v2","object":"model","created":1755815280,"released":1755475200,"owned_by":"nvidia","name":"Nvidia Nemotron Nano 9B V2","description":"NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. The model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.\\","context_window":131072,"max_tokens":131072,"type":"language","tags":["reasoning","tool-use"],"pricing":{"input":"0.00000006","output":"0.00000023"}},{"id":"openai/gpt-3.5-turbo","object":"model","created":1755815280,"released":1677628800,"owned_by":"openai","name":"GPT-3.5 Turbo","description":"OpenAI's most capable and cost effective model in the GPT-3.5 family optimized for chat purposes, but also works well for traditional completions tasks.","context_window":16385,"max_tokens":4096,"type":"language","tags":["tool-use"],"pricing":{"input":"0.0000005","output":"0.0000015"}},{"id":"openai/gpt-4-turbo","object":"model","created":1755815280,"released":1712620800,"owned_by":"openai","name":"GPT-4 Turbo","description":"gpt-4-turbo from OpenAI has broad general knowledge and domain expertise allowing it to follow complex instructions in natural language and solve difficult problems accurately. It has a knowledge cutoff of April 2023 and a 128,000 token context window.","context_window":128000,"max_tokens":4096,"type":"language","tags":["tool-use","vision"],"pricing":{"input":"0.00001","output":"0.00003"}},{"id":"openai/gpt-4.1","object":"model","created":1755815280,"released":1744588800,"owned_by":"openai","name":"GPT-4.1","description":"GPT 4.1 is OpenAI's flagship model for complex tasks. It is well suited for problem solving across domains.","context_window":1047576,"max_tokens":32768,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.000002","output":"0.000008","input_cache_read":"0.0000005","web_search":"10","service_tiers":{"priority":{"input":"0.0000035","output":"0.000014","input_cache_read":"0.000000875"}}}},{"id":"openai/gpt-4.1-mini","object":"model","created":1755815280,"released":1747180800,"owned_by":"openai","name":"GPT-4.1 mini","description":"GPT 4.1 mini provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases.","context_window":1047576,"max_tokens":32768,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.0000004","output":"0.0000016","input_cache_read":"0.0000001","web_search":"10","service_tiers":{"priority":{"input":"0.0000007","output":"0.0000028","input_cache_read":"0.000000175"}}}},{"id":"openai/gpt-4.1-nano","object":"model","created":1755815280,"released":1744588800,"owned_by":"openai","name":"GPT-4.1 nano","description":"GPT-4.1 nano is the fastest, most cost-effective GPT 4.1 model.","context_window":1047576,"max_tokens":32768,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.0000001","output":"0.0000004","input_cache_read":"0.000000025","web_search":"10","service_tiers":{"priority":{"input":"0.0000002","output":"0.0000008","input_cache_read":"0.00000005"}}}},{"id":"openai/gpt-4o","object":"model","created":1755815280,"released":1715558400,"owned_by":"openai","name":"GPT-4o","description":"GPT-4o from OpenAI has broad general knowledge and domain expertise allowing it to follow complex instructions in natural language and solve difficult problems accurately. It matches GPT-4 Turbo performance with a faster and cheaper API.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.0000025","output":"0.00001","input_cache_read":"0.00000125","web_search":"10","service_tiers":{"priority":{"input":"0.00000425","output":"0.000017","input_cache_read":"0.000002125"}}}},{"id":"openai/gpt-4o-mini","object":"model","created":1755815280,"released":1721260800,"owned_by":"openai","name":"GPT-4o mini","description":"GPT-4o mini from OpenAI is their most advanced and cost-efficient small model. It is multi-modal (accepting text or image inputs and outputting text) and has higher intelligence than gpt-3.5-turbo but is just as fast.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.00000015","output":"0.0000006","input_cache_read":"0.000000075","web_search":"10","service_tiers":{"priority":{"input":"0.00000025","output":"0.000001","input_cache_read":"0.000000125"}}}},{"id":"openai/gpt-4o-mini-search-preview","object":"model","created":1755815280,"released":1741737600,"owned_by":"openai","name":"GPT 4o Mini Search Preview","description":"GPT-4o mini Search Preview is a specialized model trained to understand and execute web search queries with the Chat Completions API. In addition to token fees, web search queries have a fee per tool call.","context_window":128000,"max_tokens":16384,"type":"language","tags":["web-search"],"pricing":{"input":"0.00000015","output":"0.0000006","web_search":"10"}},{"id":"openai/gpt-4o-mini-transcribe","object":"model","created":1755815280,"released":1710288000,"owned_by":"openai","name":"GPT-4o mini Transcribe","description":"GPT-4o mini Transcribe is a speech-to-text model that uses GPT-4o mini to transcribe audio. It offers improvements to word error rate and better language recognition and accuracy compared to original Whisper models. Use it for more accurate transcripts.","type":"transcription","pricing":{"input":"0.00000125","output":"0.000005"}},{"id":"openai/gpt-4o-transcribe","object":"model","created":1755815280,"released":1710288000,"owned_by":"openai","name":"GPT-4o Transcribe","description":"GPT-4o Transcribe is a speech-to-text model that uses GPT-4o to transcribe audio. It offers improvements to word error rate and better language recognition and accuracy compared to original Whisper models. Use it for more accurate transcripts.","type":"transcription","pricing":{"input":"0.0000025","output":"0.00001"}},{"id":"openai/gpt-5","object":"model","created":1755815280,"released":1754524800,"owned_by":"openai","name":"GPT-5","description":"GPT-5 is OpenAI's flagship language model that excels at complex reasoning, broad real-world knowledge, code-intensive, and multi-step agentic tasks.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10","service_tiers":{"priority":{"input":"0.0000025","output":"0.00002","input_cache_read":"0.00000025"},"flex":{"input":"0.000000625","output":"0.000005","input_cache_read":"0.0000000625"}}}},{"id":"openai/gpt-5-chat","object":"model","created":1755815280,"released":1754524800,"owned_by":"openai","name":"GPT 5 Chat","description":"GPT-5 Chat points to the GPT-5 snapshot currently used in ChatGPT.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10"}},{"id":"openai/gpt-5-codex","object":"model","created":1755815280,"released":1757894400,"owned_by":"openai","name":"GPT-5-Codex","description":"GPT-5-Codex is a version of GPT-5 optimized for agentic coding tasks in Codex or similar environments.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10","service_tiers":{"priority":{"input":"0.0000025","output":"0.00002","input_cache_read":"0.00000025"}}}},{"id":"openai/gpt-5-mini","object":"model","created":1755815280,"released":1754524800,"owned_by":"openai","name":"GPT-5 mini","description":"GPT-5 mini is a cost optimized model that excels at reasoning/chat tasks. It offers an optimal balance between speed, cost, and capability.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000025","output":"0.000002","input_cache_read":"0.000000025","web_search":"10","service_tiers":{"priority":{"input":"0.00000045","output":"0.0000036","input_cache_read":"0.000000045"},"flex":{"input":"0.000000125","output":"0.000001","input_cache_read":"0.0000000125"}}}},{"id":"openai/gpt-5-nano","object":"model","created":1755815280,"released":1754524800,"owned_by":"openai","name":"GPT-5 nano","description":"GPT-5 nano is a high throughput model that excels at simple instruction or classification tasks.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000005","output":"0.0000004","input_cache_read":"0.000000005","web_search":"10","service_tiers":{"flex":{"input":"0.000000025","output":"0.0000002","input_cache_read":"0.0000000025"}}}},{"id":"openai/gpt-5-pro","object":"model","created":1755815280,"released":1759708800,"owned_by":"openai","name":"GPT-5 pro","description":"GPT-5 pro uses more compute to think harder and provide consistently better answers. Since GPT-5 pro is designed to tackle tough problems, some requests may take several minutes to finish.","context_window":400000,"max_tokens":272000,"type":"language","tags":["file-input","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.000015","output":"0.00012","web_search":"10"}},{"id":"openai/gpt-5.1-codex","object":"model","created":1755815280,"released":1762905600,"owned_by":"openai","name":"GPT-5.1-Codex","description":"GPT-5.1-Codex is a version of GPT-5.1 optimized for agentic coding tasks in Codex or similar environments.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10","service_tiers":{"priority":{"input":"0.0000025","output":"0.00002","input_cache_read":"0.00000025"}}}},{"id":"openai/gpt-5.1-codex-max","object":"model","created":1755815280,"released":1763510400,"owned_by":"openai","name":"GPT 5.1 Codex Max","description":"GPT‑5.1-Codex-Max is purpose-built for agentic coding.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10","service_tiers":{"priority":{"input":"0.0000025","output":"0.00002","input_cache_read":"0.00000025"}}}},{"id":"openai/gpt-5.1-codex-mini","object":"model","created":1755815280,"released":1762905600,"owned_by":"openai","name":"GPT 5.1 Codex Mini","description":"GPT-5.1 Codex mini is a smaller, faster, and cheaper version of GPT-5.1 Codex.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000025","output":"0.000002","input_cache_read":"0.000000025","web_search":"10"}},{"id":"openai/gpt-5.1-instant","object":"model","created":1755815280,"released":1762905600,"owned_by":"openai","name":"GPT-5.1 Instant","description":"GPT-5.1 Instant (or GPT-5.1 chat) is a warmer and more conversational version of GPT-5-chat, with improved instruction following and adaptive reasoning for deciding when to think before responding.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10"}},{"id":"openai/gpt-5.1-thinking","object":"model","created":1755815280,"released":1762905600,"owned_by":"openai","name":"GPT 5.1 Thinking","description":"An upgraded version of GPT-5 that adapts thinking time more precisely to the question to spend more time on complex questions and respond more quickly to simpler tasks.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000125","output":"0.00001","input_cache_read":"0.000000125","web_search":"10","service_tiers":{"priority":{"input":"0.0000025","output":"0.00002","input_cache_read":"0.00000025"},"flex":{"input":"0.000000625","output":"0.000005","input_cache_read":"0.0000000625"}}}},{"id":"openai/gpt-5.2","object":"model","created":1755815280,"released":1765411200,"owned_by":"openai","name":"GPT 5.2","description":"GPT-5.2 is OpenAI's best general-purpose model, part of the GPT-5 flagship model family. It's their most intelligent model yet for both general and agentic tasks.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000175","output":"0.000014","input_cache_read":"0.000000175","web_search":"10","service_tiers":{"priority":{"input":"0.0000035","output":"0.000028","input_cache_read":"0.00000035"},"flex":{"input":"0.000000875","output":"0.000007","input_cache_read":"0.0000000875"}}}},{"id":"openai/gpt-5.2-chat","object":"model","created":1755815280,"released":1765411200,"owned_by":"openai","name":"GPT 5.2 Chat","description":"The model powering ChatGPT is gpt-5.2-chat-latest: this is OpenAI's best general-purpose model, part of the GPT-5 flagship model family.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.00000175","output":"0.000014","input_cache_read":"0.000000175","web_search":"10"}},{"id":"openai/gpt-5.2-codex","object":"model","created":1755815280,"released":1766016000,"owned_by":"openai","name":"GPT 5.2 Codex","description":"GPT‑5.2-Codex is a version of GPT‑5.2⁠ further optimized for agentic coding in Codex, including improvements on long-horizon work through context compaction, stronger performance on large code changes like refactors and migrations, improved performance in Windows environments, and significantly stronger cybersecurity capabilities.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000175","output":"0.000014","input_cache_read":"0.000000175","web_search":"10","service_tiers":{"priority":{"input":"0.0000035","output":"0.000028","input_cache_read":"0.00000035"}}}},{"id":"openai/gpt-5.2-pro","object":"model","created":1755815280,"released":1765411200,"owned_by":"openai","name":"GPT 5.2 ","description":"Version of GPT-5.2 that produces smarter and more precise responses.","context_window":400000,"max_tokens":128000,"type":"language","tags":["tool-use","vision","reasoning","file-input","web-search"],"pricing":{"input":"0.000021","output":"0.000168","web_search":"10"}},{"id":"openai/gpt-5.3-chat","object":"model","created":1755815280,"released":1772496000,"owned_by":"openai","name":"GPT-5.3 Chat","description":"The model powering ChatGPT is gpt-5.3-chat-latest: this is OpenAI's best general-purpose model, part of the GPT-5 flagship model family.","context_window":128000,"max_tokens":16384,"type":"language","tags":["file-input","implicit-caching","tool-use","vision","web-search"],"pricing":{"input":"0.00000175","output":"0.000014","input_cache_read":"0.000000175","web_search":"10"}},{"id":"openai/gpt-5.3-codex","object":"model","created":1755815280,"released":1770249600,"owned_by":"openai","name":"GPT 5.3 Codex","description":"GPT-5.3-Codex advances both the frontier coding performance of GPT‑5.2-Codex and the reasoning and professional knowledge capabilities of GPT‑5.2, together in one model, which is also 25% faster. This enables it to take on long-running tasks that involve research, tool use, and complex execution.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00000175","output":"0.000014","input_cache_read":"0.000000175","web_search":"10","service_tiers":{"priority":{"input":"0.0000035","output":"0.000028","input_cache_read":"0.00000035"}}}},{"id":"openai/gpt-5.4","object":"model","created":1755815280,"released":1772668800,"owned_by":"openai","name":"GPT 5.4","description":"GPT-5.4 is OpenAI's best general-purpose model, part of the GPT-5 flagship model family. It's their most intelligent model yet for both general and agentic tasks.","context_window":1050000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search","websocket-realtime"],"regions":["us"],"pricing":{"input":"0.0000025","input_tiers":[{"cost":"0.0000025","min":0,"max":272000},{"cost":"0.000005","min":272000}],"output":"0.000015","output_tiers":[{"cost":"0.000015","min":0,"max":272000},{"cost":"0.0000225","min":272000}],"input_cache_read":"0.00000025","input_cache_read_tiers":[{"cost":"0.00000025","min":0,"max":272000},{"cost":"0.0000005","min":272000}],"web_search":"10","service_tiers":{"priority":{"input":"0.000005","output":"0.00003","input_cache_read":"0.0000005"},"flex":{"input":"0.00000125","output":"0.0000075","input_cache_read":"0.00000013","long_context":{"threshold":272000,"input":"0.0000025","output":"0.00001125","input_cache_read":"0.00000025"}}}}},{"id":"openai/gpt-5.4-mini","object":"model","created":1755815280,"released":1773705600,"owned_by":"openai","name":"GPT 5.4 Mini","description":"GPT-5.4 Mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"regions":["us"],"pricing":{"input":"0.00000075","output":"0.0000045","input_cache_read":"0.000000075","web_search":"10","service_tiers":{"priority":{"input":"0.0000015","output":"0.000009","input_cache_read":"0.00000015"},"flex":{"input":"0.000000375","output":"0.00000225","input_cache_read":"0.0000000375"}}}},{"id":"openai/gpt-5.4-nano","object":"model","created":1755815280,"released":1773705600,"owned_by":"openai","name":"GPT 5.4 Nano","description":"GPT-5.4 Nano is designed for tasks where speed and cost matter most like classification, data extraction, ranking, and sub-agents.","context_window":400000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"regions":["us"],"pricing":{"input":"0.0000002","output":"0.00000125","input_cache_read":"0.00000002","web_search":"10","service_tiers":{"flex":{"input":"0.0000001","output":"0.000000625","input_cache_read":"0.00000001"}}}},{"id":"openai/gpt-5.4-pro","object":"model","created":1755815280,"released":1772668800,"owned_by":"openai","name":"GPT 5.4 Pro","description":"GPT-5.4 Pro uses more compute to think harder and provide consistently better answers. It's designed to tackle tough problems.","context_window":1050000,"max_tokens":128000,"type":"language","tags":["file-input","reasoning","tool-use","vision","web-search"],"regions":["us"],"pricing":{"input":"0.00003","input_tiers":[{"cost":"0.00003","min":0,"max":272000},{"cost":"0.00006","min":272000}],"output":"0.00018","output_tiers":[{"cost":"0.00018","min":0,"max":272000},{"cost":"0.00027","min":272000}],"web_search":"10","service_tiers":{"flex":{"input":"0.000015","output":"0.00009","long_context":{"threshold":272000,"input":"0.00003","output":"0.000135"}}}}},{"id":"openai/gpt-5.5","object":"model","created":1755815280,"released":1776988800,"owned_by":"openai","name":"GPT 5.5","description":"GPT‑5.5 understands what you’re trying to do faster and can carry more of the work itself. It excels at writing and debugging code, researching online, analyzing data, creating documents and spreadsheets, operating software, and moving across tools until a task is finished. Instead of carefully managing every step, you can give GPT‑5.5 a messy, multi-part task and trust it to plan, use tools, check its work, navigate through ambiguity, and keep going.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search","websocket-realtime"],"regions":["us"],"pricing":{"input":"0.000005","input_tiers":[{"cost":"0.000005","min":0,"max":272000},{"cost":"0.00001","min":272000}],"output":"0.00003","output_tiers":[{"cost":"0.00003","min":0,"max":272000},{"cost":"0.000045","min":272000}],"input_cache_read":"0.0000005","input_cache_read_tiers":[{"cost":"0.0000005","min":0,"max":272000},{"cost":"0.000001","min":272000}],"web_search":"10","service_tiers":{"priority":{"input":"0.0000125","output":"0.000075","input_cache_read":"0.00000125"},"flex":{"input":"0.0000025","output":"0.000015","input_cache_read":"0.00000025","long_context":{"threshold":272000,"input":"0.000005","output":"0.0000225","input_cache_read":"0.0000005"}}}}},{"id":"openai/gpt-5.5-pro","object":"model","created":1755815280,"released":1776988800,"owned_by":"openai","name":"GPT 5.5 Pro","description":"","context_window":1000000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","file-input","web-search","vision"],"regions":["us"],"pricing":{"input":"0.00003","input_tiers":[{"cost":"0.00003","min":0,"max":272000},{"cost":"0.00006","min":272000}],"output":"0.00018","output_tiers":[{"cost":"0.00018","min":0,"max":272000},{"cost":"0.00027","min":272000}],"web_search":"14","service_tiers":{"flex":{"input":"0.000015","output":"0.00009"}}}},{"id":"openai/gpt-5.6-luna","object":"model","created":1755815280,"released":1783555200,"owned_by":"openai","name":"GPT 5.6 Luna","description":"GPT-5.6 Luna is a fast, affordable GPT-5.6 model that brings strong capability at the lowest cost in the series.","context_window":1050000,"max_tokens":128000,"type":"language","tags":["reasoning","file-input","tool-use","vision","implicit-caching","web-search","websocket-realtime"],"regions":["us"],"pricing":{"input":"0.000001","input_tiers":[{"cost":"0.000001","min":0,"max":272000},{"cost":"0.000002","min":272000}],"output":"0.000006","output_tiers":[{"cost":"0.000006","min":0,"max":272000},{"cost":"0.000009","min":272000}],"input_cache_read":"0.0000001","input_cache_read_tiers":[{"cost":"0.0000001","max":272000},{"cost":"0.0000002","min":272000}],"input_cache_write":"0.00000125","input_cache_write_tiers":[{"cost":"0.00000125","min":0,"max":272000},{"cost":"0.0000025","min":272000}],"web_search":"10","service_tiers":{"priority":{"input":"0.000002","output":"0.000012","input_cache_read":"0.0000002"},"flex":{"input":"0.0000005","output":"0.000003","input_cache_read":"0.00000005","long_context":{"threshold":272000,"input":"0.000001","output":"0.0000045","input_cache_read":"0.0000001"}}}}},{"id":"openai/gpt-5.6-sol","object":"model","created":1755815280,"released":1783555200,"owned_by":"openai","name":"GPT 5.6 Sol","description":"GPT-5.6 Sol is the flagship of OpenAI's GPT-5.6 series, its most capable model for long-horizon agentic work across coding, biology, and cybersecurity.","context_window":1050000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision","web-search","websocket-realtime"],"regions":["us"],"pricing":{"input":"0.000005","input_tiers":[{"cost":"0.000005","min":0,"max":272000},{"cost":"0.00001","min":272000}],"output":"0.00003","output_tiers":[{"cost":"0.00003","min":0,"max":272000},{"cost":"0.000045","min":272000}],"input_cache_read":"0.0000005","input_cache_read_tiers":[{"cost":"0.0000005","min":0,"max":272000},{"cost":"0.000001","min":272000}],"input_cache_write":"0.00000625","input_cache_write_tiers":[{"cost":"0.00000625","min":0,"max":272000},{"cost":"0.0000125","min":272000}],"web_search":"10","service_tiers":{"priority":{"input":"0.00001","output":"0.00006","input_cache_read":"0.000001"},"flex":{"input":"0.0000025","output":"0.000015","input_cache_read":"0.00000025","long_context":{"threshold":272000,"input":"0.000005","output":"0.0000225","input_cache_read":"0.0000005"}}}}},{"id":"openai/gpt-5.6-terra","object":"model","created":1755815280,"released":1783555200,"owned_by":"openai","name":"GPT 5.6 Terra","description":"GPT-5.6 Terra is a balanced GPT-5.6 model for everyday work, with performance comparable to the previous generation at half the cost.","context_window":1050000,"max_tokens":128000,"type":"language","tags":["reasoning","web-search","file-input","tool-use","vision","implicit-caching","websocket-realtime"],"regions":["us"],"pricing":{"input":"0.0000025","input_tiers":[{"cost":"0.0000025","min":0,"max":272000},{"cost":"0.000005","min":272000}],"output":"0.000015","output_tiers":[{"cost":"0.000015","min":0,"max":272000},{"cost":"0.0000225","min":272000}],"input_cache_read":"0.00000025","input_cache_read_tiers":[{"cost":"0.00000025","min":0,"max":272000},{"cost":"0.0000005","min":272000}],"input_cache_write":"0.000003125","input_cache_write_tiers":[{"cost":"0.000003125","min":0,"max":272000},{"cost":"0.00000625","min":272000}],"web_search":"10","service_tiers":{"priority":{"input":"0.000005","output":"0.00003","input_cache_read":"0.0000005"},"flex":{"input":"0.00000125","output":"0.0000075","input_cache_read":"0.000000125","long_context":{"threshold":272000,"input":"0.0000025","output":"0.00001125","input_cache_read":"0.00000025"}}}}},{"id":"openai/gpt-image-1","object":"model","created":1755815280,"released":1742860800,"owned_by":"openai","name":"GPT Image 1","description":"GPT Image 1 is OpenAI's new state-of-the-art image generation model. It is a natively multimodal language model that accepts both text and image inputs, and produces image outputs.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation","implicit-caching"],"pricing":{"input":"0.000005","output":"0.00004","input_cache_read":"0.00000125"}},{"id":"openai/gpt-image-1-mini","object":"model","created":1755815280,"released":1759708800,"owned_by":"openai","name":"GPT Image 1 Mini","description":"A cost-efficient version of GPT Image 1. It is a natively multimodal language model that accepts both text and image inputs, and produces image outputs.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation","implicit-caching"],"pricing":{"input":"0.000002","output":"0.000008","input_cache_read":"0.0000002"}},{"id":"openai/gpt-image-1.5","object":"model","created":1755815280,"released":1765843200,"owned_by":"openai","name":"GPT Image 1.5","description":"GPT Image 1.5 is OpenAI's latest image generation model, with better instruction following and adherence to prompts.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation","implicit-caching"],"pricing":{"input":"0.000005","output":"0.000032","input_cache_read":"0.00000125"}},{"id":"openai/gpt-image-2","object":"model","created":1755815280,"released":1776729600,"owned_by":"openai","name":"GPT Image 2","description":"GPT Image 2 is OpenAI's state-of-the-art image generation model for fast, high-quality image generation and editing. It supports flexible image sizes and high-fidelity image inputs.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation","implicit-caching"],"pricing":{"input":"0.000005","output":"0.00003","input_cache_read":"0.00000125"}},{"id":"openai/gpt-oss-120b","object":"model","created":1755815280,"released":1754352000,"owned_by":"openai","name":"GPT OSS 120B","description":"Extremely capable general-purpose LLM with strong, controllable reasoning capabilities","context_window":131072,"max_tokens":131072,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000001","output":"0.0000005"}},{"id":"openai/gpt-oss-20b","object":"model","created":1755815280,"released":1754352000,"owned_by":"openai","name":"GPT OSS 20B","description":"A compact, open-weight language model optimized for low-latency and resource-constrained environments, including local and edge deployments.","context_window":131072,"max_tokens":8192,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000005","output":"0.0000002"}},{"id":"openai/gpt-oss-safeguard-20b","object":"model","created":1755815280,"released":1761696000,"owned_by":"openai","name":"GPT OSS Safeguard 20B","description":"OpenAI's first open weight reasoning model specifically trained for safety classification tasks. Fine-tuned from GPT-OSS, this model helps classify text content based on customizable policies, enabling bring-your-own-policy Trust & Safety AI where your own taxonomy, definitions, and thresholds guide classification decisions.","context_window":131072,"max_tokens":65536,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.000000075","output":"0.0000003","input_cache_read":"0.000000037"}},{"id":"openai/gpt-realtime-1.5","object":"model","created":1755815280,"released":1771804800,"owned_by":"openai","name":"GPT-Realtime-1.5","description":"GPT-Realtime-1.5 is our flagship audio model for voice agents and customer support.","context_window":0,"max_tokens":0,"type":"realtime","pricing":{"input":"0.000004","output":"0.000016","input_cache_read":"0.0000004","web_search":"10"}},{"id":"openai/gpt-realtime-2","object":"model","created":1755815280,"released":1778112000,"owned_by":"openai","name":"gpt-realtime-2","description":"GPT Realtime 2 is our most capable realtime voice model. It supports speech-to-speech interactions with configurable reasoning effort, stronger instruction following, and more reliable tool use for complex voice-agent workflows.","context_window":0,"max_tokens":0,"type":"realtime","tags":["websocket-realtime"],"pricing":{"input":"0.000004","output":"0.000024","input_cache_read":"0.0000004","web_search":"10"}},{"id":"openai/gpt-realtime-2.1","object":"model","created":1755815280,"released":1783555200,"owned_by":"openai","name":"gpt-realtime-2.1","description":"GPT-Realtime-2.1 updates GPT-Realtime-2 with improved alphanumeric recognition, silence and noise handling, and interruption behavior. It supports speech-to-speech interactions with configurable reasoning effort, instruction following, and tool use for complex voice-agent workflows.","context_window":128000,"max_tokens":32000,"type":"realtime","pricing":{"input":"0.000004","output":"0.000024","input_cache_read":"0.0000004","web_search":"10"}},{"id":"openai/gpt-realtime-mini","object":"model","created":1755815280,"released":1760054400,"owned_by":"openai","name":"GPT-Realtime mini","description":"GPT-Realtime mini is capable of responding to audio and text inputs in realtime over WebRTC, WebSocket, or SIP connections.","context_window":0,"max_tokens":0,"type":"realtime","pricing":{"input":"0.0000006","output":"0.0000024","input_cache_read":"0.00000006","web_search":"10"}},{"id":"openai/gpt-realtime-whisper","object":"model","created":1755815280,"released":1778112000,"owned_by":"openai","name":"gpt-realtime-whisper","description":"GPT Realtime Whisper is a streaming speech-to-text model for applications that need low-latency transcript deltas from live audio. It is designed for realtime use cases where developers need to tune latency and accuracy. GPT Realtime Whisper is priced by audio duration rather than text tokens.","type":"transcription","tags":["websocket-realtime","websocket-transcription"],"pricing":{"input":"0.0000000002","transcription_duration_cost_per_second":"0.000284"}},{"id":"openai/o1","object":"model","created":1755815280,"released":1733356800,"owned_by":"openai","name":"o1","description":"o1 is OpenAI's flagship reasoning model, designed for complex problems that require deep thinking. It provides strong reasoning capabilities with improved accuracy for complex multi-step tasks.","context_window":200000,"max_tokens":100000,"type":"language","tags":["file-input","reasoning","tool-use","vision","implicit-caching"],"pricing":{"input":"0.000015","output":"0.00006","input_cache_read":"0.0000075"}},{"id":"openai/o3","object":"model","created":1755815280,"released":1744761600,"owned_by":"openai","name":"o3","description":"OpenAI's o3 is their most powerful reasoning model, setting new state-of-the-art benchmarks in coding, math, science, and visual perception. It excels at complex queries requiring multi-faceted analysis, with particular strength in analyzing images, charts, and graphics.","context_window":200000,"max_tokens":100000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.000002","output":"0.000008","input_cache_read":"0.0000005","web_search":"10","service_tiers":{"priority":{"input":"0.0000035","output":"0.000014","input_cache_read":"0.000000875"},"flex":{"input":"0.000001","output":"0.000004","input_cache_read":"0.00000025"}}}},{"id":"openai/o3-deep-research","object":"model","created":1755815280,"released":1750896000,"owned_by":"openai","name":"o3-deep-research","description":"o3-deep-research is OpenAI's most advanced model for deep research, designed to tackle complex, multi-step research tasks. It can search and synthesize information from across the internet as well as from your own data—brought in through MCP connectors.","context_window":200000,"max_tokens":100000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.00001","output":"0.00004","input_cache_read":"0.0000025","web_search":"10"}},{"id":"openai/o3-mini","object":"model","created":1755815280,"released":1738281600,"owned_by":"openai","name":"o3-mini","description":"o3-mini is OpenAI's most recent small reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.","context_window":200000,"max_tokens":100000,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.0000011","output":"0.0000044","input_cache_read":"0.00000055"}},{"id":"openai/o3-pro","object":"model","created":1755815280,"released":1749513600,"owned_by":"openai","name":"o3 Pro","description":"The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.","context_window":200000,"max_tokens":100000,"type":"language","tags":["reasoning","vision","file-input","tool-use","web-search"],"pricing":{"input":"0.00002","output":"0.00008","web_search":"10"}},{"id":"openai/o4-mini","object":"model","created":1755815280,"released":1744761600,"owned_by":"openai","name":"o4-mini","description":"OpenAI's o4-mini delivers fast, cost-efficient reasoning with exceptional performance for its size, particularly excelling in math (best-performing on AIME benchmarks), coding, and visual tasks.","context_window":200000,"max_tokens":100000,"type":"language","tags":["file-input","implicit-caching","reasoning","tool-use","vision","web-search"],"pricing":{"input":"0.0000011","output":"0.0000044","input_cache_read":"0.000000275","web_search":"10","service_tiers":{"priority":{"input":"0.000002","output":"0.000008","input_cache_read":"0.0000005"},"flex":{"input":"0.00000055","output":"0.0000022","input_cache_read":"0.000000138"}}}},{"id":"openai/text-embedding-3-large","object":"model","created":1755815280,"released":1706140800,"owned_by":"openai","name":"text-embedding-3-large","description":"OpenAI's most capable embedding model for both english and non-english tasks.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000013"}},{"id":"openai/text-embedding-3-small","object":"model","created":1755815280,"released":1706140800,"owned_by":"openai","name":"text-embedding-3-small","description":"OpenAI's improved, more performant version of their ada embedding model.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000002"}},{"id":"openai/text-embedding-ada-002","object":"model","created":1755815280,"released":1671062400,"owned_by":"openai","name":"text-embedding-ada-002","description":"OpenAI's legacy text embedding model.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.0000001"}},{"id":"openai/tts-1","object":"model","created":1755815280,"released":1699228800,"owned_by":"openai","name":"TTS-1","description":"TTS is a model that converts text to natural sounding spoken text.","type":"speech","pricing":{"input":"0.000015","speech_input_character_cost":"0.000015"}},{"id":"openai/tts-1-hd","object":"model","created":1755815280,"released":1699228800,"owned_by":"openai","name":"TTS-1 HD","description":"TTS is a model that converts text to natural sounding spoken text. The tts-1-hd model is optimized for high quality text-to-speech use cases.","type":"speech","pricing":{"input":"0.00003","speech_input_character_cost":"0.00003"}},{"id":"openai/whisper-1","object":"model","created":1755815280,"released":1663718400,"owned_by":"openai","name":"Whisper","description":"Whisper is a general-purpose speech recognition model, trained on a large dataset of diverse audio. You can also use it as a multitask model to perform multilingual speech recognition as well as speech translation and language identification.","type":"transcription","pricing":{"input":"0.0000000001","transcription_duration_cost_per_second":"0.0001"}},{"id":"perplexity/sonar","object":"model","created":1755815280,"released":1739923200,"owned_by":"perplexity","name":"Sonar","description":"Perplexity's lightweight offering with search grounding, quicker and cheaper than Sonar Pro.","context_window":127000,"max_tokens":8000,"type":"language","tags":["vision","web-search"],"pricing":{}},{"id":"perplexity/sonar-pro","object":"model","created":1755815280,"released":1739923200,"owned_by":"perplexity","name":"Sonar Pro","description":"Perplexity's premier offering with search grounding, supporting advanced queries and follow-ups.","context_window":200000,"max_tokens":8000,"type":"language","tags":["vision","web-search"],"pricing":{}},{"id":"perplexity/sonar-reasoning-pro","object":"model","created":1755815280,"released":1739923200,"owned_by":"perplexity","name":"Sonar Reasoning Pro","description":"A premium reasoning-focused model that outputs Chain of Thought (CoT) in responses, providing comprehensive explanations with enhanced search capabilities and multiple search queries per request.","context_window":127000,"max_tokens":8000,"type":"language","tags":["reasoning","web-search"],"pricing":{}},{"id":"prodia/flux-fast-schnell","object":"model","created":1755815280,"released":1722556800,"owned_by":"prodia","name":"Flux Schnell","description":"Lightning-fast image generation","context_window":512,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{}},{"id":"quiverai/arrow-1.1","object":"model","created":1755815280,"released":1776297600,"owned_by":"quiverai","name":"Arrow 1.1","description":"Best default for speed and quality, with stronger prompt following and more precise SVG structure.","context_window":131072,"max_tokens":131072,"type":"image","tags":["image-generation"],"pricing":{"image_dimension_quality_pricing":[{"operation":"generate","cost":"0.2"},{"operation":"vectorize","cost":"0.15"}]}},{"id":"recraft/recraft-v2","object":"model","created":1755815280,"released":1710288000,"owned_by":"recraft","name":"Recraft V2","description":"Recraft V2 is an image generation model released in March 2024 and the first model trained from scratch by Recraft. With 20 billion parameters, it was a breakthrough in human anatomical accuracy and the first to support brand consistency and brand color inputs. It also introduced vector image generation (SVG output), as well as minimalistic icon and illustration styles.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.022","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.044"}]}},{"id":"recraft/recraft-v3","object":"model","created":1755815280,"released":1730246400,"owned_by":"recraft","name":"Recraft V3","description":"V3 introduced major advances in photorealism and text rendering. It was the first Recraft model to generate mid-size text accurately and, as of 2025, is the only model capable of placing text at specific positions in an image.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.08"}]}},{"id":"recraft/recraft-v4","object":"model","created":1755815280,"released":1771286400,"owned_by":"recraft","name":"Recraft V4","description":"The model delivers strong photorealism, including realistic skin rendering and natural textures, while avoiding common synthetic artifacts. It produces more distinctive lighting, composition, diverse subjects, contemporary styling, and carefully considered scene elements. For illustration, it generates original characters and forms with sophisticated and unexpected color combinations.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.04","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.08"}]}},{"id":"recraft/recraft-v4-pro","object":"model","created":1755815280,"released":1771286400,"owned_by":"recraft","name":"Recraft V4 Pro","description":"The model delivers strong photorealism, including realistic skin rendering and natural textures, while avoiding common synthetic artifacts. It produces more distinctive lighting, composition, diverse subjects, contemporary styling, and carefully considered scene elements. For illustration, it generates original characters and forms with sophisticated and unexpected color combinations.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.25","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.3"}]}},{"id":"recraft/recraft-v4.1","object":"model","created":1755815280,"released":1778716800,"owned_by":"recraft","name":"Recraft V4.1","description":"V4.1 is built on the same visual aesthetic and the same eye for what looks right, just pushed further across every dimension. The photorealism is more natural. The gradients are dreamier. Illustration styles are now possible that simply weren't before. And this is a model that reads a short prompts easier and creates something worth stopping for.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.035","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.08"}]}},{"id":"recraft/recraft-v4.1-pro","object":"model","created":1755815280,"released":1778716800,"owned_by":"recraft","name":"Recraft V4.1 Pro","description":"V4.1 is built on the same visual aesthetic and the same eye for what looks right, just pushed further across every dimension. The photorealism is more natural. The gradients are dreamier. Illustration styles are now possible that simply weren't before. And this is a model that reads a short prompts easier and creates something worth stopping for. V4.1 Pro generates higher-resolution images for when the idea deserves more room.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.21","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.3"}]}},{"id":"recraft/recraft-v4.1-utility","object":"model","created":1755815280,"released":1778716800,"owned_by":"recraft","name":"Recraft V4.1 Utility","description":"V4.1 is built on the same visual aesthetic and the same eye for what looks right, just pushed further across every dimension. The photorealism is more natural. The gradients are dreamier. Illustration styles are now possible that simply weren't before. And this is a model that reads a short prompts easier and creates something worth stopping for. V4.1 Utility is designed for when restraint is the aesthetic choice, with flat lighting, front-facing composition, and simple, controlled scenes.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.035","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.08"}]}},{"id":"recraft/recraft-v4.1-utility-pro","object":"model","created":1755815280,"released":1778716800,"owned_by":"recraft","name":"Recraft V4.1 Utility Pro","description":"V4.1 is built on the same visual aesthetic and the same eye for what looks right, just pushed further across every dimension. The photorealism is more natural. The gradients are dreamier. Illustration styles are now possible that simply weren't before. And this is a model that reads a short prompts easier and creates something worth stopping for. V4.1 Utility is designed for when restraint is the aesthetic choice, with flat lighting, front-facing composition, and simple, controlled scenes.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.21","image_dimension_quality_pricing":[{"style":"vector_illustration","cost":"0.3"}]}},{"id":"sakana/fugu-ultra","object":"model","created":1755815280,"released":1782000000,"owned_by":"sakana","name":"Fugu Ultra","description":"Fugu Ultra coordinates a deeper pool of expert agents to maximize answer quality on hard, high-stakes problems. Fugu Ultra focuses on maximizing performance, for a higher cost. It routes between one to three agents, depending on the problem.","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["vision","tool-use","reasoning"],"pricing":{"input":"0.000005","input_tiers":[{"cost":"0.000005","min":0,"max":272001},{"cost":"0.00001","min":272001}],"output":"0.00003","output_tiers":[{"cost":"0.00003","min":0,"max":272001},{"cost":"0.000045","min":272001}],"input_cache_read":"0.0000005","input_cache_read_tiers":[{"cost":"0.0000005","min":0,"max":272001},{"cost":"0.000001","min":272001}]}},{"id":"stepfun/step-3.5-flash","object":"model","created":1755815280,"released":1769644800,"owned_by":"stepfun","name":"StepFun 3.5 Flash","description":"Step 3.5 Flash is an open-source reasoning model by StepFun with 196B total parameters (11B active) using Mixture of Experts. It features a 256K context window, deep reasoning, tool calling, and agentic capabilities, achieving 97.3 on AIME 2025 and 74.4% on SWE-bench Verified.","context_window":262114,"max_tokens":262114,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000009","output":"0.0000003","input_cache_read":"0.00000002"}},{"id":"stepfun/step-3.7-flash","object":"model","created":1755815280,"released":1779926400,"owned_by":"stepfun","name":"Step 3.7 Flash","description":"StepFun’s flagship multimodal reasoning model. Powered by a 198B-parameter / 11B-activation sparse MoE architecture, with native support for image and video understanding.","context_window":256000,"max_tokens":256000,"type":"language","tags":["implicit-caching","reasoning","tool-use","vision"],"pricing":{"input":"0.0000002","output":"0.00000115","input_cache_read":"0.00000004"}},{"id":"thinkingmachines/inkling","object":"model","created":1755815280,"released":1784073600,"owned_by":"thinkingmachines","name":"Inkling","description":"Inkling is a multimodal MoE model (975B total, 41B active, 256k context) reasoning over text, image, and audio inputs.","context_window":256000,"max_tokens":256000,"type":"language","tags":["reasoning","tool-use","vision","file-input"],"pricing":{"input":"0.000001","output":"0.00000405","input_cache_read":"0.00000017"}},{"id":"voyage/rerank-2.5","object":"model","created":1755815280,"released":1754870400,"owned_by":"voyage","name":"Voyage Rerank 2.5","description":"A generalist reranker optimized for quality with instruction-following and multilingual support.","context_window":32000,"max_tokens":32000,"type":"reranking","pricing":{"input":"0.00000005"}},{"id":"voyage/rerank-2.5-lite","object":"model","created":1755815280,"released":1754870400,"owned_by":"voyage","name":"Voyage Rerank 2.5 Lite","description":"A generalist reranker optimized for both latency and quality with instruction-following and multilingual support.","context_window":32000,"max_tokens":32000,"type":"reranking","pricing":{"input":"0.00000002"}},{"id":"voyage/voyage-3-large","object":"model","created":1755815280,"released":1736208000,"owned_by":"voyage","name":"voyage-3-large","description":"Voyage AI's embedding model with the best general-purpose and multilingual retrieval quality.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000018"}},{"id":"voyage/voyage-3.5","object":"model","created":1755815280,"released":1747699200,"owned_by":"voyage","name":"Voyage 3.5","description":"Voyage AI's embedding model optimized for general-purpose and multilingual retrieval quality.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000006"}},{"id":"voyage/voyage-3.5-lite","object":"model","created":1755815280,"released":1747699200,"owned_by":"voyage","name":"Voyage 3.5 Lite","description":"Voyage AI's embedding model optimized for latency and cost.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000002"}},{"id":"voyage/voyage-4","object":"model","created":1755815280,"released":1768435200,"owned_by":"voyage","name":"Voyage 4","description":"Optimized for general-purpose and multilingual retrieval quality. All embeddings created with the 4 series are compatible with each other.","context_window":32000,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000006"}},{"id":"voyage/voyage-4-large","object":"model","created":1755815280,"released":1768435200,"owned_by":"voyage","name":"Voyage 4 Large","description":"The best general-purpose and multilingual retrieval quality. All embeddings created with the 4 series are compatible with each other.","context_window":32000,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000012"}},{"id":"voyage/voyage-4-lite","object":"model","created":1755815280,"released":1768435200,"owned_by":"voyage","name":"Voyage 4 Lite","description":"Optimized for latency and cost. All embeddings created with the 4 series are compatible with each other.","context_window":32000,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000002"}},{"id":"voyage/voyage-code-2","object":"model","created":1755815280,"released":1704067200,"owned_by":"voyage","name":"Voyage Code 2","description":"Voyage AI's embedding model optimized for code retrieval (17% better than alternatives). This is the previous generation of code embeddings models.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000012"}},{"id":"voyage/voyage-code-3","object":"model","created":1755815280,"released":1733270400,"owned_by":"voyage","name":"Voyage Code 3","description":"Voyage AI's embedding model optimized for code retrieval.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000018"}},{"id":"voyage/voyage-finance-2","object":"model","created":1755815280,"released":1717372800,"owned_by":"voyage","name":"Voyage Finance 2","description":"Voyage AI's embedding model optimized for finance retrieval and RAG.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000012"}},{"id":"voyage/voyage-law-2","object":"model","created":1755815280,"released":1713139200,"owned_by":"voyage","name":"Voyage Law 2","description":"Voyage AI's embedding model optimized for legal retrieval and RAG.","context_window":0,"max_tokens":0,"type":"embedding","pricing":{"input":"0.00000012"}},{"id":"xai/grok-4.1-fast-non-reasoning","object":"model","created":1755815280,"released":1763510400,"owned_by":"xai","name":"Grok 4.1 Fast Non-Reasoning","description":"","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["tool-use","file-input","vision","implicit-caching"],"pricing":{"input":"0.0000002","output":"0.0000005","input_cache_read":"0.00000005"}},{"id":"xai/grok-4.1-fast-reasoning","object":"model","created":1755815280,"released":1763510400,"owned_by":"xai","name":"Grok 4.1 Fast Reasoning","description":"","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["reasoning","file-input","vision","tool-use","implicit-caching"],"pricing":{"input":"0.0000002","output":"0.0000005","input_cache_read":"0.00000005"}},{"id":"xai/grok-4.20-multi-agent","object":"model","created":1755815280,"released":1773100800,"owned_by":"xai","name":"Grok 4.20 Multi-Agent","description":"Multiple agents collaborate in parallel to perform deep research tasks.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["reasoning","tool-use","implicit-caching","vision","file-input","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.20-multi-agent-beta","object":"model","created":1755815280,"released":1773187200,"owned_by":"xai","name":"Grok 4.20 Multi Agent Beta","description":"Multiple agents collaborate in parallel to perform deep research tasks.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["reasoning","tool-use","implicit-caching","vision","file-input","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.20-non-reasoning","object":"model","created":1755815280,"released":1773100800,"owned_by":"xai","name":"Grok 4.20 Non-Reasoning","description":"Grok 4.20 Beta is the newest flagship model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherence, delivering consistently precise and truthful responses.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["tool-use","implicit-caching","file-input","vision","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.20-non-reasoning-beta","object":"model","created":1755815280,"released":1773187200,"owned_by":"xai","name":"Grok 4.20 Beta Non-Reasoning","description":"Grok 4.20 Beta is the newest flagship model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["tool-use","implicit-caching","vision","file-input","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.20-reasoning","object":"model","created":1755815280,"released":1773100800,"owned_by":"xai","name":"Grok 4.20 Reasoning","description":"Grok 4.20 Beta is the newest flagship model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherence, delivering consistently precise and truthful responses.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["reasoning","tool-use","implicit-caching","vision","file-input","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.20-reasoning-beta","object":"model","created":1755815280,"released":1773187200,"owned_by":"xai","name":"Grok 4.20 Beta Reasoning","description":"Grok 4.20 Beta is the newest flagship model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering consistently precise and truthful responses.","context_window":2000000,"max_tokens":2000000,"type":"language","tags":["reasoning","tool-use","vision","file-input","implicit-caching","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.3","object":"model","created":1755815280,"released":1777507200,"owned_by":"xai","name":"Grok 4.3","description":"Grok 4.3 is a new model matching the scale of Grok 4.20 with an improved architecture and a December 2025 knowledge cutoff.","context_window":1000000,"max_tokens":1000000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision","web-search"],"pricing":{"input":"0.00000125","input_tiers":[{"cost":"0.00000125","min":0,"max":200001},{"cost":"0.0000025","min":200001}],"output":"0.0000025","output_tiers":[{"cost":"0.0000025","min":0,"max":200001},{"cost":"0.000005","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-4.5","object":"model","created":1755815280,"released":1783468800,"owned_by":"xai","name":"Grok 4.5","description":"SpaceXAI's smartest model with frontier performance on coding, knowledge work, and STEM.","context_window":500000,"max_tokens":500000,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision","web-search"],"pricing":{"input":"0.000002","input_tiers":[{"cost":"0.000002","min":0,"max":200001},{"cost":"0.000004","min":200001}],"output":"0.000006","output_tiers":[{"cost":"0.000006","min":0,"max":200001},{"cost":"0.000012","min":200001}],"input_cache_read":"0.0000005","input_cache_read_tiers":[{"cost":"0.0000005","min":0,"max":200001},{"cost":"0.000001","min":200001}],"web_search":"5"}},{"id":"xai/grok-build-0.1","object":"model","created":1755815280,"released":1779235200,"owned_by":"xai","name":"Grok Build 0.1","description":"xAI's fast coding model trained specifically for agentic coding.","context_window":256000,"max_tokens":256000,"type":"language","tags":["reasoning","implicit-caching","vision","tool-use","web-search"],"pricing":{"input":"0.000001","input_tiers":[{"cost":"0.000001","min":0,"max":200001},{"cost":"0.000002","min":200001}],"output":"0.000002","output_tiers":[{"cost":"0.000002","min":0,"max":200001},{"cost":"0.000004","min":200001}],"input_cache_read":"0.0000002","input_cache_read_tiers":[{"cost":"0.0000002","min":0,"max":200001},{"cost":"0.0000004","min":200001}],"web_search":"5"}},{"id":"xai/grok-imagine-image","object":"model","created":1755815280,"released":1769558400,"owned_by":"xai","name":"Grok Imagine Image","description":"Generate high-quality images from text prompts with xAI's imagine API.","context_window":0,"max_tokens":0,"type":"image","tags":["image-generation"],"pricing":{"image":"0.02"}},{"id":"xai/grok-imagine-video","object":"model","created":1755815280,"released":1769558400,"owned_by":"xai","name":"Grok Imagine","description":"State-of-the-art video generation across quality, cost, and latency. Grok Imagine is x.AI's most powerful video-audio generative model yet. Bring an image to life, start from a simple text prompt, or even refine a complex cinematic sequence.","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["text-to-video","image-to-video","reference-to-video","video-editing","extend-video"],"supported_resolutions":["480p","720p"],"supported_aspect_ratios":["1:1","16:9","9:16","4:3","3:4","3:2","2:3"],"supported_durations_seconds":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":false,"supported_fps":[24],"input_limits":{"image":{"max_count":5,"supported_formats":["jpg","jpeg","png","webp","gif","avif"],"supported_sources":["url","base64","buffer"]},"video":{"max_count":1,"supported_sources":["url"],"min_duration_seconds":2,"max_duration_seconds":8.7}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.05"},{"resolution":"720p","cost_per_second":"0.07"}]}},{"id":"xai/grok-imagine-video-1.5","object":"model","created":1755815280,"released":1782086400,"owned_by":"xai","name":"Grok Imagine Video 1.5","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["1:1","16:9","9:16","4:3","3:4","3:2","2:3"],"supported_durations_seconds":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"image":{"max_count":1,"supported_sources":["url","base64"]}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.08"},{"resolution":"720p","cost_per_second":"0.14"},{"resolution":"1080p","cost_per_second":"0.25"}]}},{"id":"xai/grok-imagine-video-1.5-preview","object":"model","created":1755815280,"released":1780099200,"owned_by":"xai","name":"Grok Imagine Video 1.5 Preview","description":"","context_window":0,"max_tokens":0,"type":"video","video_capabilities":{"supported_operations":["image-to-video"],"supported_resolutions":["480p","720p","1080p"],"supported_aspect_ratios":["1:1","16:9","9:16","4:3","3:4","3:2","2:3"],"supported_durations_seconds":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],"generate_audio":true,"supported_fps":[24],"input_limits":{"image":{"max_count":1,"supported_sources":["url","base64"]}}},"pricing":{"video_duration_pricing":[{"resolution":"480p","cost_per_second":"0.08"},{"resolution":"720p","cost_per_second":"0.14"},{"resolution":"1080p","cost_per_second":"0.25"}]}},{"id":"xai/grok-stt","object":"model","created":1755815280,"released":1773619200,"owned_by":"xai","name":"Grok STT","description":"Transcribe audio to text in 25 languages with batch and streaming modes.","type":"transcription","tags":["websocket-transcription"],"pricing":{"input":"0","transcription_duration_cost_per_second":"0.000028"}},{"id":"xai/grok-tts","object":"model","created":1755815280,"released":1773619200,"owned_by":"xai","name":"Grok TTS","description":"Generate speech with 5 expressive voices, speech tags, and telephony codecs.","type":"speech","pricing":{"input":"0.000015","speech_input_character_cost":"0.000015"}},{"id":"xai/grok-voice-think-fast-1.0","object":"model","created":1755815280,"released":1776902400,"owned_by":"xai","name":"Grok Voice Think Fast 1.0","description":"Build real-time voice applications powered by Grok. Stream audio and text bidirectionally via WebSocket for voice assistants, phone agents, and interactive voice systems.","context_window":0,"max_tokens":0,"type":"realtime","pricing":{}},{"id":"xiaomi/mimo-v2.5","object":"model","created":1755815280,"released":1776816000,"owned_by":"xiaomi","name":"MiMo M2.5","description":"A native full-modal model supporting text, image, video, and audio understanding, with powerful Agent capabilities.","context_window":1050000,"max_tokens":131100,"type":"language","tags":["reasoning","tool-use","implicit-caching","file-input","vision"],"pricing":{"input":"0.00000014","output":"0.00000028","input_cache_read":"0.0000000028"}},{"id":"xiaomi/mimo-v2.5-pro","object":"model","created":1755815280,"released":1776816000,"owned_by":"xiaomi","name":"MiMo V2.5 Pro","description":"MiMo V2.5 Pro delivers significant improvements over its predecessor, MiMo-V2-Pro, in general agentic capabilities, complex software engineering, and long-horizon tasks. MiMo-V2.5-Pro is a 1.02T-parameter Mixture-of-Experts model with 42B active parameters, built on a hybrid-attention architecture with a 1M-token context window.","context_window":1050000,"max_tokens":131000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.000000435","output":"0.00000087","input_cache_read":"0.0000000036"}},{"id":"zai/glm-4.5","object":"model","created":1755815280,"released":1753660800,"owned_by":"zai","name":"GLM 4.5","description":"GLM-4.5 and GLM-4.5-Air are our latest flagship models, purpose-built as foundational models for agent-oriented applications. Both leverage a Mixture-of-Experts (MoE) architecture. GLM-4.5 has a total parameter count of 355B with 32B active parameters per forward pass, while GLM-4.5-Air adopts a more streamlined design with 106B total parameters and 12B active parameters.","context_window":128000,"max_tokens":96000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000006","output":"0.0000022","input_cache_read":"0.00000011"}},{"id":"zai/glm-4.5-air","object":"model","created":1755815280,"released":1753660800,"owned_by":"zai","name":"GLM 4.5 Air","description":"GLM-4.5 and GLM-4.5-Air are our latest flagship models, purpose-built as foundational models for agent-oriented applications. Both leverage a Mixture-of-Experts (MoE) architecture. GLM-4.5 has a total parameter count of 355B with 32B active parameters per forward pass, while GLM-4.5-Air adopts a more streamlined design with 106B total parameters and 12B active parameters.","context_window":128000,"max_tokens":96000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000002","output":"0.0000011","input_cache_read":"0.00000003"}},{"id":"zai/glm-4.5v","object":"model","created":1755815280,"released":1754870400,"owned_by":"zai","name":"GLM 4.5V","description":"Built on the GLM-4.5-Air base model, GLM-4.5V inherits proven techniques from GLM-4.1V-Thinking while achieving effective scaling through a powerful 106B-parameter MoE architecture.","context_window":66000,"max_tokens":16000,"type":"language","tags":["implicit-caching","reasoning","tool-use","vision"],"pricing":{"input":"0.0000006","output":"0.0000018","input_cache_read":"0.00000011"}},{"id":"zai/glm-4.6","object":"model","created":1755815280,"released":1759190400,"owned_by":"zai","name":"GLM 4.6","description":"As the latest iteration in the GLM series, GLM-4.6 achieves comprehensive enhancements across multiple domains, including real-world coding, long-context processing, reasoning, searching, writing, and agentic applications.","context_window":200000,"max_tokens":96000,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"pricing":{"input":"0.0000006","output":"0.0000022","input_cache_read":"0.00000011"}},{"id":"zai/glm-4.6v","object":"model","created":1755815280,"released":1759190400,"owned_by":"zai","name":"GLM-4.6V","description":"GLM-4.6V series are Z.ai’s iterations in a multimodal large language model. GLM-4.6V scales its context window to 128k tokens in training, and achieves SoTA performance in visual understanding among models of similar parameter scales.","context_window":128000,"max_tokens":24000,"type":"language","tags":["vision","file-input","reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000003","output":"0.0000009","input_cache_read":"0.00000005"}},{"id":"zai/glm-4.6v-flash","object":"model","created":1755815280,"released":1759190400,"owned_by":"zai","name":"GLM-4.6V-Flash","description":"For local deployment and low-latency applications. GLM-4.6V series are Z.ai’s iterations in a multimodal large language model. GLM-4.6V scales its context window to 128k tokens in training, and achieves SoTA performance in visual understanding among models of similar parameter scales.","context_window":128000,"max_tokens":24000,"type":"language","tags":["vision","reasoning","file-input","tool-use","implicit-caching"],"pricing":{}},{"id":"zai/glm-4.7","object":"model","created":1755815280,"released":1766361600,"owned_by":"zai","name":"GLM 4.7","description":"GLM-4.7 is Z.ai’s latest flagship model, with major upgrades focused on two key areas: stronger coding capabilities and more stable multi-step reasoning and execution.","context_window":200000,"max_tokens":120000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000006","output":"0.0000022","input_cache_read":"0.00000012"}},{"id":"zai/glm-4.7-flash","object":"model","created":1755815280,"released":1768780800,"owned_by":"zai","name":"GLM 4.7 Flash","description":"GLM-4.7-Flash balances high performance with efficiency, making it the perfect lightweight deployment option. Beyond coding, it is also recommended for creative writing, translation, long-context tasks, and roleplay.","context_window":200000,"max_tokens":131000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000007","output":"0.0000004"}},{"id":"zai/glm-4.7-flashx","object":"model","created":1755815280,"released":1768780800,"owned_by":"zai","name":"GLM 4.7 FlashX","description":" GLM-4.7-Flash balances high performance with efficiency, making it the perfect lightweight deployment option. ","context_window":200000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.00000006","output":"0.0000004","input_cache_read":"0.00000001"}},{"id":"zai/glm-5","object":"model","created":1755815280,"released":1770854400,"owned_by":"zai","name":"GLM 5","description":"GLM-5 is Zai’s new-generation flagship foundation model, designed for Agentic Engineering, capable of providing reliable productivity in complex system engineering and long-range Agent tasks. In terms of Coding and Agent capabilities, GLM-5 has achieved state-of-the-art (SOTA) performance in open source, with its usability in real programming scenarios approaching that of Claude Opus 4.5.","context_window":202800,"max_tokens":131100,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.00000095","output":"0.00000315","input_cache_read":"0.0000002"}},{"id":"zai/glm-5-turbo","object":"model","created":1755815280,"released":1773532800,"owned_by":"zai","name":"GLM 5 Turbo","description":"GLM 5 Turbo is a foundation model deeply optimized for the OpenClaw scenario. It has been specifically optimized for the core requirements of OpenClaw tasks since the training phase, enhancing key capabilities such as tool invocation, command following, timed and persistent tasks, and long-chain execution.","context_window":202800,"max_tokens":131100,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"pricing":{"input":"0.0000012","output":"0.000004","input_cache_read":"0.00000024"}},{"id":"zai/glm-5.1","object":"model","created":1755815280,"released":1775520000,"owned_by":"zai","name":"GLM 5.1","description":"","context_window":202000,"max_tokens":202000,"type":"language","tags":["implicit-caching","reasoning","tool-use"],"regions":["us"],"pricing":{"input":"0.0000013","output":"0.0000043","input_cache_read":"0.00000026"}},{"id":"zai/glm-5.2","object":"model","created":1755815280,"released":1781568000,"owned_by":"zai","name":"GLM 5.2","description":"GLM-5.2 delivers powerful coding capabilities, usable 1M-context support, and continued strengths in long-horizon tasks.","context_window":1040000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000014","output":"0.0000044","input_cache_read":"0.00000026"}},{"id":"zai/glm-5.2-fast","object":"model","created":1755815280,"released":1782172800,"owned_by":"zai","name":"GLM 5.2 Fast","description":"Fast version of GLM 5.2 with 120-250 TPS.","context_window":1000000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","implicit-caching"],"regions":["us"],"pricing":{"input":"0.0000021","output":"0.0000066","input_cache_read":"0.00000021"}},{"id":"zai/glm-5v-turbo","object":"model","created":1755815280,"released":1775001600,"owned_by":"zai","name":"GLM 5V Turbo","description":"GLM-5V-Turbo is Z.AI’s first multimodal coding foundation model, built for vision-based coding tasks. It can natively process multimodal inputs such as images, video, and text, while also excelling at long-horizon planning, complex coding, and action execution. Deeply optimized for agent workflows, it works seamlessly with agents such as Claude Code and OpenClaw to complete the full loop of “understand the environment → plan actions → execute tasks”.","context_window":200000,"max_tokens":128000,"type":"language","tags":["reasoning","tool-use","implicit-caching","vision","file-input"],"pricing":{"input":"0.0000012","output":"0.000004","input_cache_read":"0.00000024"}}]}