10:[["$","script",null,{"type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"{\"@context\":\"https://schema.org\",\"@type\":\"SoftwareApplication\",\"name\":\"GLM TTS API\",\"applicationCategory\":\"DeveloperApplication\",\"operatingSystem\":\"Cross-platform\",\"url\":\"https://platform.empiriolabs.ai/dashboard/playground?model=glm-tts\",\"description\":\"GLM TTS API, playground, docs, and pricing on EmpirioLabs. LLM-based text-to-speech with zero-shot voice cloning from 3-10s of audio and emotion-expressive, controllable output via multi-reward RL.\",\"provider\":{\"@type\":\"Organization\",\"name\":\"EmpirioLabs AI\",\"url\":\"https://platform.empiriolabs.ai\"}}"}}],["$","$L27",null,{"models":[{"slug":"kling-3-0-turbo","display_name":"Kling 3.0 Turbo","description":"Text-to-video and image-to-video with synchronized native audio, at 720p or 1080p for 3 to 15 seconds, with aspect ratio and prompt control.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"720p","value":"$$0.18"},{"spec":"per second","label":"1080p","value":"$$0.225"}],"requires_image_input":false,"max_image_inputs":1,"metadata":{"provider":"kling","provider_name":"Kling AI","features":["text_to_video","image_to_video","audio","video_generation"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/kling-3-0-turbo.png","is_featured":true,"is_new":true}},{"slug":"glm-5-2","display_name":"GLM 5.2","description":"Reasoning and coding model with a 1M token context, 128K output, adjustable reasoning effort, native web search, and tool calling.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":1000000,"api_status":"platform","supported_endpoints":["/v1/chat/completions","/v1/responses","/v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.40"},{"spec":"per 1M generated tokens","label":"Output","value":"$$4.40"},{"spec":"per request","label":"Web Search","value":"$$0.033"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["reasoning","function_calling","structured_output","web_search"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":true,"is_new":true}},{"slug":"kimi-k2-7-code","display_name":"Kimi K2.7 Code","description":"Kimi K2.7 Code is Moonshot's trillion-parameter agentic coding model with 256K context, always-on reasoning, and text, image, and video inputs.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.95"},{"spec":"per 1M generated tokens","label":"Output","value":"$$4.00"},{"spec":"per call when invoked","label":"Web search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"moonshot","provider_name":"Moonshot AI","features":["reasoning","function_calling","structured_output","multimodal","agentic_coding","web_search"],"type":"reasoning","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/kimi.png","is_featured":true,"is_new":true}},{"slug":"fugu-ultra","display_name":"Fugu Ultra","description":"Multi-agent conductor that orchestrates frontier expert models for hard reasoning, coding, and research, with 1M context, image input, and web search.","modalities":["multimodal","text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=272K $7.50; >272K $15.00"},{"spec":"per 1M generated tokens","label":"Output","value":"<=272K $45.00; >272K $67.50"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"<=272K $1.50; >272K $3.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"sakana","provider_name":"Sakana AI","features":["reasoning","multimodal","web_search","function_calling","structured_output","agentic_coding","cache"],"type":"reasoning","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/fugu.png","is_featured":true,"is_new":true}},{"slug":"qwen3-7-plus","display_name":"Qwen3.7 Plus","description":"Cost-effective Qwen3.7 vision-language model for text, image, video, coding, tool use, GUI understanding, and 1M-context workflows.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.40; 256K-1M $1.20"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $1.60; 256K-1M $4.80"},{"spec":"per call","label":"Web Search","value":"$$0.03"},{"spec":"per call","label":"Image Search","value":"$$0.03"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","code_interpreter","function_calling","structured_output","prefix_continuation","fine_tuning","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-7-plus:variant1","display_name":"Qwen3.7 Plus (Variant 1)","description":"Cost-effective Qwen3.7 vision-language model for text, image, video, coding, tool use, GUI understanding, and 1M-context workflows.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.276 (was $0.40); 256K-1M $0.826 (was $1.20)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $1.101 (was $1.60); 256K-1M $3.301 (was $4.80)"},{"spec":"per 1M cached prompt tokens","label":"Implicit cache input","value":"<=256K $0.056 (was $0.08); 256K-1M $0.166 (was $0.24)"},{"spec":"per call","label":"Web Search","value":"$$0.01"},{"spec":"per call","label":"Image Search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","code_interpreter","function_calling","structured_output","prefix_continuation","cache","fine_tuning","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"kimi-k2-7-code-highspeed","display_name":"Kimi K2.7 Code Highspeed","description":"Kimi K2.7 Code Highspeed is the faster-serving tier of Moonshot's agentic coding model, with 256K context, always-on reasoning, and image and video input.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.90"},{"spec":"per 1M generated tokens","label":"Output","value":"$$8.00"},{"spec":"per call when invoked","label":"Web search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"moonshot","provider_name":"Moonshot AI","features":["reasoning","function_calling","structured_output","multimodal","agentic_coding","web_search"],"type":"reasoning","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/kimi.png","is_featured":true,"is_new":true}},{"slug":"minimax-m3","display_name":"MiniMax M3","description":"MiniMax M3 is a multimodal reasoning model for coding, agents, and long-context analysis with text, image, and video input.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":524000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=512K $0.225 (was $0.30); >512K $1.20"},{"spec":"per 1M generated tokens","label":"Output","value":"<=512K $0.90 (was $1.20); >512K $4.80"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"<=512K $0.045 (was $0.06); >512K $0.24"},{"spec":"per successful search when enabled","label":"Linkup web search","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"minimax","provider_name":"MiniMax","features":["reasoning","vision","video","web_search","function_calling","cache","long_context"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/minimax.png","is_featured":true,"is_new":true}},{"slug":"qwen3-7-max","display_name":"Qwen3.7 Max","description":"Qwen3.7 Max is a flagship text model for coding, productivity, long-running agents, deep thinking, tools, and 1M-token context.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.50"},{"spec":"per 1M generated tokens","label":"Output","value":"$$7.50"},{"spec":"per call when invoked","label":"Web search","value":"$$0.02"},{"spec":"per call when invoked","label":"Web extractor","value":"$$0.02"},{"spec":"per call when invoked","label":"Code interpreter","value":"$$0.02"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","web_search","code_interpreter","function_calling","agentic_coding"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-7-max:variant1","display_name":"Qwen3.7 Max (Variant 1)","description":"Qwen3.7 Max is a flagship text model for coding, productivity, long-running agents, deep thinking, tools, and 1M-token context.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.65 (was $2.50)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$4.951 (was $7.50)"},{"spec":"per call when invoked","label":"Web search","value":"$$0.01"},{"spec":"per call when invoked","label":"Web extractor","value":"$$0.01"},{"spec":"per call when invoked","label":"Code interpreter","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","web_search","code_interpreter","function_calling","agentic_coding"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"ace-step-1.5-xl","display_name":"ACE-Step 1.5 XL","description":"Open-source music generation model for text-to-song and lyric-guided audio, with fast 8-step XL Turbo inference for controllable song iteration.","modalities":["text","audio"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/generations"],"pricing_rows":[{"spec":"per generated second","label":"Music generation","value":"$$0.00025 (was $0.0003)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"ace-step","provider_name":"ACE-Step","features":["music_generation","lyrics","text_to_music","seed_control","commercial_ready"],"type":"generation","modality":null,"category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/ace-step-1.5-xl.png","is_featured":true,"is_new":true}},{"slug":"flux-2-klein-4b","display_name":"FLUX.2 Klein 4B","description":"Apache-licensed 4B FLUX.2 Klein image generation and editing model with text-to-image, reference-image editing, and creative workflow support.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Image generation","value":"$$0.0085 (was $0.014)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"black-forest-labs","provider_name":"Black Forest Labs","features":["image_generation","image_editing","reference_images","seed_control"],"type":"generation","modality":null,"category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/flux-2-klein-4b.png","is_featured":true,"is_new":true}},{"slug":"minimax-m2-7-highspeed","display_name":"MiniMax M2.7 Highspeed","description":"High-speed M2.7 variant tuned for fast inference with strong general-purpose performance with strong agentic capabilities.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":200000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.30 (was $0.60)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$1.20 (was $2.40)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.03 (was $0.06)"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"minimax","provider_name":"MiniMax","features":["reasoning"],"type":"chat","modality":null,"category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/minimax.png","is_featured":true,"is_new":false}},{"slug":"tts-1-5-mini","display_name":"TTS 1.5 Mini","description":"Sub-130ms TTFB voice synthesis with 271+ voices across 15 languages, expressive prosody, and real-time SSE streaming for low-latency voice agents.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech","POST /v1/audio/speech:stream","GET /v1/voices"],"pricing_rows":[{"spec":"per 1M characters","label":"Synthesis","value":"$$17.50 (was $25.00)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"playground_suggestions":["Welcome to EmpirioLabs AI, your one-stop API for frontier AI models.","Hello, and thank you for calling. How can I help you today?","Bienvenidos a EmpirioLabs. Frontier AI, una sola API.","And in tonight's headlines: voice AI just took another major step forward."],"provider":"inworld","provider_name":"Inworld","features":["multi_speaker","real_time","low_latency","streaming","word_timestamps","character_timestamps","multilingual","expressive_prosody"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/tts-1-5-mini.png","is_featured":true,"is_new":true}},{"slug":"tts-1-5-max","display_name":"TTS 1.5 Max","description":"Broadcast-quality voice synthesis with rich expressive prosody, 271+ voices across 15 languages, and real-time SSE streaming with per-word timestamps.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech","POST /v1/audio/speech:stream","GET /v1/voices"],"pricing_rows":[{"spec":"per 1M characters","label":"Synthesis","value":"$$29.75 (was $35.00)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"playground_suggestions":["Welcome to EmpirioLabs AI, your one-stop API for frontier AI models.","Hello, and thank you for calling. How can I help you today?","Bienvenidos a EmpirioLabs. Frontier AI, una sola API.","And in tonight's headlines: voice AI just took another major step forward."],"provider":"inworld","provider_name":"Inworld","features":["multi_speaker","real_time","streaming","word_timestamps","character_timestamps","multilingual","expressive_prosody","broadcast_quality"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/tts-1-5-max.png","is_featured":true,"is_new":true}},{"slug":"glm-5-1","display_name":"GLM 5.1","description":"Long-context Zhipu AI reasoning model with 202K context, 128K output, tool calling, structured output, and cache support.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":202000,"api_status":"platform","supported_endpoints":["/v1/chat/completions","/v1/responses","/v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=32K $0.825 (was $1.40); 32K-200K $1.10 (was $1.40)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=32K $3.301 (was $4.40); 32K-200K $3.851 (was $4.40)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"<=32K $0.165 (was $0.26); 32K-200K $0.22 (was $0.26)"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["reasoning","function_calling","structured_output","cache"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":true,"is_new":true}},{"slug":"kimi-k2-6","display_name":"Kimi K2.6","description":"Kimi K2.6 is a Moonshot multimodal reasoning model with 256K context, strong coding, and text, image, and video inputs.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.8939 (was $0.95)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.7131 (was $4.00)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.1788"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"moonshot","provider_name":"Moonshot AI","features":["reasoning","function_calling","cache","multimodal"],"type":"reasoning","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/kimi.png","is_featured":true,"is_new":true}},{"slug":"deepseek-v4-flash:variant2","display_name":"DeepSeek V4 Flash (Variant 2)","description":"DeepSeek V4 Flash is a lightweight MoE reasoning model with 1M context, high throughput, automatic cache reads, and web search.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.138 (was $0.14)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.275 (was $0.28)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.028"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning","function_calling","web_search","cache"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":true,"is_new":true}},{"slug":"deepseek-v4-pro:variant2","display_name":"DeepSeek V4 Pro (Variant 2)","description":"DeepSeek V4 Pro is a flagship MoE reasoning model with 1M context, strong coding, automatic cache reads, and web search.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.65 (was $1.74)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.301 (was $3.48)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.138"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning","function_calling","web_search","cache"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":true,"is_new":true}},{"slug":"minimax-m2-7","display_name":"MiniMax M2.7","description":"MiniMax M2.7 is a general-purpose reasoning chat model with interleaved thinking, function calling, and prompt caching.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":200000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.15 (was $0.30)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.60 (was $1.20)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.03 (was $0.06)"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"minimax","provider_name":"MiniMax","features":["reasoning","function_calling","cache"],"type":"chat","modality":null,"category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/minimax.png","is_featured":true,"is_new":true}},{"slug":"gemma-4-e4b","display_name":"Gemma 4 E4B","description":"Gemma 4 E4B is a Google open multimodal chat model with image input, function calling, structured output, and efficient instruction following.","modalities":["multimodal","text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":8000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.03"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"google","provider_name":"Google","features":["vision","function_calling","structured_output","reasoning"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemma.png","is_featured":false,"is_new":false}},{"slug":"trellis-2-4b","display_name":"TRELLIS.2 4B","description":"TRELLIS.2 image-to-3D model that turns a reference image into a textured GLB asset with resolution, seed, mesh, texture, and export controls.","modalities":["3D","image"],"input_modalities":["image"],"output_modalities":["3D"],"category":"3d","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/3d/generations"],"pricing_rows":[{"spec":"per request","label":"512 asset","value":"$$0.025 (was $0.25)"},{"spec":"per request","label":"1024 asset","value":"$$0.249 (was $0.30)"},{"spec":"per request","label":"1536 asset","value":"$$0.499"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"microsoft","provider_name":"Microsoft","features":["image_to_3d","glb_output","pbr_materials","seed_control","commercial_ready"],"type":"generation","modality":"3D","category_label":"3D","modality_label":"3D","region":null,"logo":"https://media.empiriolabs.ai/model-logos/trellis-2-4b.png","is_featured":true,"is_new":true}},{"slug":"qwen3-5-122b-a10b","display_name":"Qwen3.5 122B-A10B","description":"Qwen3.5 122B-A10B is a multimodal reasoning model with 256K context, efficient sparse MoE inference, and text, image, and video input.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.115 (was $0.40); 128K-256K $0.287 (was $0.40)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.917 (was $3.20); 128K-256K $2.294 (was $3.20)"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","web_search","function_calling","structured_output","multimodal"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-5-397b-a17b","display_name":"Qwen3.5 397B-A17B","description":"Qwen3.5 397B-A17B is a flagship multimodal reasoning model for language, code, agents, GUI tasks, and image and video understanding.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.172 (was $0.60); 128K-256K $0.43 (was $0.60)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $1.032 (was $3.60); 128K-256K $2.58 (was $3.60)"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","web_search","function_calling","structured_output","multimodal"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-5-35b-a3b","display_name":"Qwen3.5 35B-A3B","description":"Qwen3.5 35B-A3B is an efficient native vision-language model with sparse MoE routing, deep thinking, and text, image, and video input.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.057 (was $0.25); 128K-256K $0.229 (was $0.25)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.459 (was $2.00); 128K-256K $1.835 (was $2.00)"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","web_search","function_calling","structured_output","multimodal"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-5-27b","display_name":"Qwen3.5 27B","description":"Qwen3.5 27B is a dense multimodal reasoning model with fast responses, 256K context, and text, image, and video understanding.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.086 (was $0.30); 128K-256K $0.258 (was $0.30)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.688 (was $2.40); 128K-256K $2.064 (was $2.40)"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","web_search","function_calling","structured_output","multimodal"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-6-27b","display_name":"Qwen3.6 27B","description":"Qwen3.6 27B improves agentic coding, STEM reasoning, spatial vision, OCR, and text, image, and video understanding on 256K context.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.412564 (was $0.60)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$2.475384 (was $3.60)"},{"spec":"per request when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","web_search","function_calling","structured_output","multimodal"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-6-flash","display_name":"Qwen3.6 Flash","description":"Fast Qwen3.6 vision-language model for agentic coding, math reasoning, spatial understanding, OCR, and text, image, and video input.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.25; 256K-1M $1.00"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $1.50; 256K-1M $4.00"},{"spec":"per query when enabled","label":"Web search","value":"$$0.02"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","function_calling","structured_output","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-6-flash:variant1","display_name":"Qwen3.6 Flash (Variant 1)","description":"China-region Qwen3.6 Flash keeps the same vision, coding, reasoning, and video capabilities with lower token pricing than the Singapore standard route.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.165 (was $0.25); 256K-1M $0.66 (was $1.00)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $0.99 (was $1.50); 256K-1M $3.961 (was $4.00)"},{"spec":"per query when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","function_calling","structured_output","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-6-plus:variant1","display_name":"Qwen3.6 Plus (Variant 1)","description":"Qwen3.6 Plus is a state-of-the-art vision-language model for agentic coding, multimodal recognition, OCR, and object localization.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.276 (was $0.50); 256K-1M $1.101 (was $2.00)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $1.651 (was $3.00); 256K-1M $6.602"},{"spec":"per query when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","function_calling","structured_output","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-5-plus:variant1","display_name":"Qwen3.5 Plus (Variant 1)","description":"China-region Qwen3.5 Plus is a native vision-language model for efficient text, image, video, coding, and reasoning workloads.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.115 (was $0.36); 128K-256K $0.287 (was $0.36); 256K-1M $0.573 (was $1.08)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.688 (was $2.21); 128K-256K $1.72 (was $2.21); 256K-1M $3.44 (was $6.62)"},{"spec":"per query when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","function_calling","structured_output","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"qwen3-5-flash:variant1","display_name":"Qwen3.5 Flash (Variant 1)","description":"China-region Qwen3.5 Flash delivers fast native vision-language inference for text, image, video, coding, and reasoning tasks.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.029 (was $0.090); 128K-256K $0.115; 256K-1M $0.172"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.287 (was $0.368); 128K-256K $1.147; 256K-1M $1.72"},{"spec":"per query when enabled","label":"Web search","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","web_search","function_calling","structured_output","agentic_coding"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"China","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":true}},{"slug":"gemma-4-26b-a4b","display_name":"Gemma 4 26B-A4B","description":"Gemma 4 26B A4B is a Google open multimodal model with 256K context, text, image, and video input, tools, and structured output.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/completions"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.05 (was $0.15)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.29 (was $0.50)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.025 (was $0.15)"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"google","provider_name":"Google","features":["reasoning","vision","video","function_calling","structured_output","cache","multimodal","json_mode","logprobs"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemma.png","is_featured":false,"is_new":true}},{"slug":"qwen3-5-9b","display_name":"Qwen3.5 9B","description":"Qwen3.5 9B is a compact multimodal reasoning model with 256K context, image and video input, function tools, and structured output.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/completions"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.09 (was $0.10)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.13 (was $0.15)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.045"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","function_calling","structured_output","cache","multimodal","json_mode","logprobs"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"qwen3-5-4b","display_name":"Qwen3.5 4B","description":"Qwen3.5 4B is a low-cost multimodal reasoning model with 256K context, image and video input, function tools, and structured output.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/completions"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.04"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.07"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.02"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","vision","video","function_calling","structured_output","cache","multimodal","json_mode","logprobs"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"glm-4-7-flash","display_name":"GLM 4.7 Flash","description":"Free lightweight GLM-4.7 text model for coding, reasoning, long-context writing, and general chat.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":200000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"Free"},{"spec":"per 1M generated tokens","label":"Output","value":"Free"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"Free"},{"spec":"per request when enabled","label":"Web Search","value":"$$0.033"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["reasoning","function_calling","structured_output","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":false,"is_new":true}},{"slug":"glm-4-5-flash","display_name":"GLM 4.5 Flash","description":"Free lightweight GLM-4.5 text model for reasoning, coding, long-form chat, and general language tasks.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":200000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"Free"},{"spec":"per 1M generated tokens","label":"Output","value":"Free"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"Free"},{"spec":"per request when enabled","label":"Web Search","value":"$$0.033"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["reasoning","function_calling","structured_output","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":false,"is_new":true}},{"slug":"glm-4-6v-flash","display_name":"GLM 4.6V Flash","description":"Free multimodal GLM-4.6V model for image, video, file, and text understanding with native function calling.","modalities":["text","image","video","file"],"input_modalities":["text","image","video","file"],"output_modalities":["text"],"category":"text","type":"chat","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"Free"},{"spec":"per 1M generated tokens","label":"Output","value":"Free"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"Free"},{"spec":"per request when enabled","label":"Web Search","value":"$$0.033"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["vision","video_understanding","document_understanding","function_calling","structured_output","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":false,"is_new":true}},{"slug":"amazon-nova-canvas","display_name":"Amazon Nova Canvas","description":"Image generation and editing model creating and modifying images from text or image inputs, with inpainting, virtual try-on, and style controls.","modalities":["image","text"],"input_modalities":["text","image"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Small Standard (≤1024×1024)","value":"$$0.12"},{"spec":"per image","label":"Small Premium (≤1024×1024)","value":"$$0.18"},{"spec":"per image","label":"Large Standard (≤2048×2048)","value":"$$0.18"},{"spec":"per image","label":"Large Premium (≤2048×2048)","value":"$$0.24"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["inpainting","background_removal"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/amazon-nova-canvas.png","is_featured":false,"is_new":false}},{"slug":"amazon-nova-reel-1-1","display_name":"Amazon Nova Reel 1.1","description":"Video generation model producing up to 2-minute multi-shot videos from text and optional image prompts with improved quality and consistency.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"Per Second","value":"$$0.14"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["camera_control"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/amazon-nova-reel-1-1.png","is_featured":false,"is_new":false}},{"slug":"deepgram-nova-3","display_name":"Deepgram Nova 3","description":"Speech-to-text transcription using the Nova-3 model with multi-language support and advanced customizable settings for production workloads.","modalities":["audio","text"],"input_modalities":["audio"],"output_modalities":["text"],"category":"transcription","type":"stt","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/transcriptions"],"pricing_rows":[{"spec":"per minute of audio","label":"Transcription","value":"$$0.014"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepgram","provider_name":"Deepgram","features":["transcription","speech_to_text"],"type":"stt","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/deepgram-nova-3.png","is_featured":false,"is_new":false}},{"slug":"deepreasoning","display_name":"DeepReasoning","description":"Pairs DeepSeek R1 chain-of-thought reasoning with Anthropic Claude creative and code generation behind a unified, data-controlled interface.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1K tokens","label":"R1-0528 + Claude Sonnet 4.5 (Default)","value":"In $0.012 / Out $0.058"},{"spec":"per 1K tokens","label":"R1-0528 + Claude Haiku 4.5","value":"In $0.0048 / Out $0.023"},{"spec":"per 1K tokens","label":"R1-0528 + Claude Opus 4.5","value":"In $0.019 / Out $0.092"},{"spec":"per 1K tokens","label":"R1-0528 + Claude Sonnet 4","value":"In $0.012 / Out $0.058"},{"spec":"per 1K tokens","label":"R1-0528 + Claude Opus 4.1","value":"In $0.053 / Out $0.26"},{"spec":"per 1K tokens","label":"R1-0528 + Claude Opus 4","value":"In $0.053 / Out $0.26"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"winfunc","provider_name":"WinFunc","features":["reasoning","thinking"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/deepreasoning.png","is_featured":false,"is_new":false}},{"slug":"deepseek-prover-v2","display_name":"DeepSeek Prover V2","description":"Open-source LLM specialized in formal theorem proving in Lean 4, built on a recursive theorem-proving pipeline.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.020"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["math","reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/deepseek-prover-v2.png","is_featured":false,"is_new":false}},{"slug":"deepseek-v3-2","display_name":"DeepSeek V3.2","description":"Open-source Mixture-of-Experts LLM tuned for high-efficiency reasoning, coding, and general language tasks across long-form prompts.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.57"},{"spec":"per 1M generated tokens","label":"Output","value":"$$1.71"},{"spec":"per call","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":false,"is_new":false}},{"slug":"deepseek-v4-flash","display_name":"DeepSeek V4 Flash","description":"Lightweight MoE model with 284B total / 13B active parameters and native 1M context, tuned for low-latency, cost-effective high-concurrency use.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.14"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.28"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Germany","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":true,"is_new":false}},{"slug":"deepseek-v4-pro","display_name":"DeepSeek V4 Pro","description":"Flagship MoE LLM with 1.6T total / 49B active parameters and native 1M context for advanced math, logical inference, and specialized coding.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.65 (was $1.74)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.30 (was $3.48)"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Germany","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":true,"is_new":false}},{"slug":"exa-answer","display_name":"Exa Answer","description":"Quick LLM-style answer to a natural-language question, grounded in fresh Exa web search results with inline citations and source links.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"search","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/answer","POST /v1/search","POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per request","label":"Answer","value":"$$0.01"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"exa","provider_name":"Exa","features":["web_search","answer_engine"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/exa-answer.png","is_featured":false,"is_new":false}},{"slug":"exa-search","display_name":"Exa Search","description":"Web search engine for finding pages, retrieving similar pages, crawling, and dedicated code search across the open web for AI agents.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"search","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/search"],"pricing_rows":[{"spec":"per search","label":"Search (1-25 results)","value":"$$0.0060"},{"spec":"per search","label":"Search (26-100 results)","value":"$$0.030"},{"spec":"per page/feature","label":"Content (Text/Highlights/Summary)","value":"$$0.0060"},{"spec":"per 1k tokens","label":"Code Search","value":"$$0.0060"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"exa","provider_name":"Exa","features":["web_search","semantic_search"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/exa-search.png","is_featured":false,"is_new":false}},{"slug":"gemini-2-5-flash-tts","display_name":"Gemini 2.5 Flash TTS","description":"Low-latency text-to-speech with single- and multi-speaker voices and controllable style, accent, and expressive tone for production apps.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.50"},{"spec":"per 1M generated tokens","label":"Output","value":"$$30.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"google","provider_name":"Google","features":["text_to_speech","multi_speaker","multilingual"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemini-2-5-flash-tts.png","is_featured":false,"is_new":false}},{"slug":"gemini-2-5-pro-tts","display_name":"Gemini 2.5 Pro TTS","description":"High-quality TTS preview for podcasts, audiobooks, and customer support, with expressive multi-speaker voices across 23+ languages.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$3.00"},{"spec":"per 1M generated tokens","label":"Output","value":"$$60.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"google","provider_name":"Google","features":["text_to_speech","multi_speaker","multilingual"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemini-2-5-pro-tts.png","is_featured":false,"is_new":false}},{"slug":"gemini-3-1-flash-tts","display_name":"Gemini 3.1 Flash TTS","description":"Highly controllable TTS with new Audio Tags for precise style, tone, pace, and delivery across narration, assistants, and voice apps.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.60"},{"spec":"per 1M generated tokens","label":"Output","value":"$$52.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"playground_suggestions":["Welcome to EmpirioLabs AI, your one-stop API for frontier AI models.","[whispers] In a world where every voice matters, choose the one that fits.","[excited] Big news, everyone! We just shipped a major update to the platform.","Calmly read this short meditation: take a deep breath in, hold for four seconds, and let it go slowly."],"provider":"google","provider_name":"Google","features":["text_to_speech","multi_speaker","multilingual"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemini-3-1-flash-tts.png","is_featured":false,"is_new":false}},{"slug":"gemma-3-27b","display_name":"Gemma 3 27B","description":"Open-source vision-language model with 128K context, 140+ languages, improved math/reasoning, structured outputs, and function calling.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.0040"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"google","provider_name":"Google","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gemma.png","is_featured":false,"is_new":false}},{"slug":"glm-tts","display_name":"GLM TTS","description":"LLM-based text-to-speech with zero-shot voice cloning from 3-10s of audio and emotion-expressive, controllable output via multi-reward RL.","modalities":["audio","text"],"input_modalities":["text","audio"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1k characters","label":"Fast (INT8)","value":"$$0.20"},{"spec":"per 1k characters","label":"Quality (FP16)","value":"$$0.21"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["voice_cloning","emotion_control"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":false,"is_new":false}},{"slug":"gptzero","display_name":"GPTZero","description":"Deep-learning detector that flags portions of text likely generated by AI versus human, classifying content as entirely human, AI, or mixed.","modalities":["text","document"],"input_modalities":["text","document"],"output_modalities":["text"],"category":"tools","type":"tool","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/detect","POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1,000 words","label":"Text Scan","value":"$$0.39"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"accepts_documents":true,"hide_playground_suggestions":true,"provider":"gptzero","provider_name":"GPTZero","features":["ai_detection","classification"],"type":"tool","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/gptzero.png","is_featured":false,"is_new":false}},{"slug":"happyhorse-1-0","display_name":"HappyHorse 1.0","description":"Video model offering Text-to-Video, Image-to-Video, Reference-to-Video, and Video Edit modes with high-fidelity, motion-smooth output.","modalities":["video","text","image"],"input_modalities":["text","image","video"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"All Modes 720P","value":"$$0.14"},{"spec":"per second","label":"All Modes 1080P","value":"$$0.24"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["audio_sync","editing"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/happyhorse-1-0.png","is_featured":false,"is_new":true}},{"slug":"hunyuan-image-3","display_name":"Hunyuan Image 3","description":"Open-source text-to-image model on a multimodal Mixture-of-Experts architecture with photorealistic detail and strong multilingual text rendering.","modalities":["image","text"],"input_modalities":["text"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Standard","value":"$$0.13"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"tencent","provider_name":"Tencent","features":["photorealistic","multilingual"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/hunyuan-image-3.png","is_featured":false,"is_new":false}},{"slug":"hunyuan-video-1-5","display_name":"Hunyuan Video 1.5","description":"8.3B-parameter video model with native 720p output (upscalable to 1080p), strong motion coherence, and bilingual prompt understanding up to 10s.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"480p","value":"$$0.061 (was $0.075)"},{"spec":"per second","label":"720p","value":"$$0.29"},{"spec":"per second","label":"1080p (upscaled)","value":"$$0.67"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"tencent","provider_name":"Tencent","features":["1080p_upscale","bilingual"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/hunyuan-video-1-5.png","is_featured":true,"is_new":false}},{"slug":"janus-pro-deepseek","display_name":"Janus-Pro DeepSeek","description":"Autoregressive framework on the Janus Pro 7B model that unifies multimodal understanding and image generation in one architecture.","modalities":["image","text"],"input_modalities":["text","image"],"output_modalities":["text","image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations","POST /v1/chat/completions","POST /v1/images/analysis"],"pricing_rows":[{"spec":"per image","label":"Image Generation","value":"$$0.030"},{"spec":"per uploaded image","label":"Image Analysis","value":"$$0.030"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["vision","image_generation"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/janus-pro-deepseek.png","is_featured":false,"is_new":false}},{"slug":"kling-o3","display_name":"Kling O3","description":"Video model in Standard or Pro modes with Text-to-Video, Image-to-Video, Reference-to-Video, editing, native sound, and multi-scene transitions.","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"Standard T2V/I2V","value":"$$0.168"},{"spec":"per second","label":"Standard T2V/I2V Sound","value":"$$0.224"},{"spec":"per second","label":"Standard Video Input","value":"$$0.252"},{"spec":"per second","label":"Pro T2V/I2V","value":"$$0.224"},{"spec":"per second","label":"Pro T2V/I2V Sound","value":"$$0.280"},{"spec":"per second","label":"Pro Video Input","value":"$$0.336"},{"spec":"per second","label":"4K T2V/I2V/Ref","value":"$$0.525"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"kling","provider_name":"Kling AI","features":["audio","editing"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/kling-o3.png","is_featured":false,"is_new":false}},{"slug":"kling-v3-motion-control","display_name":"Kling v3 Motion Control","description":"Kling 3.0 model that transfers motion from a reference video onto a character from a reference image, with Standard 720p and Pro 1080p tiers.","modalities":["video","text","image"],"input_modalities":["text","image","video"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"Standard (720p)","value":"$$0.14"},{"spec":"per second","label":"Pro (1080p)","value":"$$0.18"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"kling","provider_name":"Kling AI","features":["motion_control"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/kling-v3-motion-ctrl.png","is_featured":false,"is_new":false}},{"slug":"linkup-deep-search","display_name":"Linkup Deep Search","description":"Iterative AI search that keeps querying when initial results are insufficient, returning more comprehensive answers than Standard mode.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"research","context_window":100000,"api_status":"platform","supported_endpoints":["POST /v1/search","POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.13"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"linkup","provider_name":"Linkup","features":["web_search","deep_research"],"type":"research","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/linkup-deep-search.png","is_featured":false,"is_new":false}},{"slug":"linkup-standard","display_name":"Linkup Standard","description":"AI-powered web search with detailed overviews and answers, faster than Deep Search. Ranks #1 on OpenAI SimpleQA benchmark.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"search","context_window":100000,"api_status":"platform","supported_endpoints":["POST /v1/search","POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"linkup","provider_name":"Linkup","features":["web_search"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/linkup-standard.png","is_featured":false,"is_new":false}},{"slug":"magistral-medium-2509-thinking","display_name":"Magistral Medium 2509 Thinking","description":"Reasoning model tuned for tasks needing longer thought and higher accuracy: legal research, financial forecasting, software, and storytelling.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":40000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.60"},{"spec":"per 1M generated tokens","label":"Output","value":"$$6.50"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"mistral","provider_name":"Mistral AI","features":["reasoning","thinking"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/magistral-medium-2509-thinking.png","is_featured":false,"is_new":false}},{"slug":"mistral-medium-3","display_name":"Mistral Medium 3","description":"Cost-efficient language model offering strong reasoning and multimodal performance for general production workloads at competitive latency.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":130000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.015"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"mistral","provider_name":"Mistral AI","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mistral-medium-3.png","is_featured":false,"is_new":false}},{"slug":"mistral-medium-3-1","display_name":"Mistral Medium 3.1","description":"Enterprise-grade model with strong reasoning, coding, and STEM performance, supporting hybrid, on-prem, and in-VPC deployments.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":131000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.52"},{"spec":"per 1M generated tokens","label":"Output","value":"$$2.60"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"mistral","provider_name":"Mistral AI","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mistral-medium-3-1.png","is_featured":false,"is_new":false}},{"slug":"mistral-small-3-1","display_name":"Mistral Small 3.1","description":"24B-parameter multimodal model with 128K context for image analysis, programming, math, and multilingual tasks, tuned for efficient local inference.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"fixed","label":"Per Message","value":"$$0.0019"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"mistral","provider_name":"Mistral AI","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mistral-small-3-1.png","is_featured":false,"is_new":false}},{"slug":"mistral-small-4","display_name":"Mistral Small 4","description":"Hybrid model unifying Instruct, Reasoning (Magistral), and Devstral families: 40% lower completion time and 3x throughput vs Small 3.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.15"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.60"},{"spec":"per call","label":"Standard Web Search","value":"$$0.084"},{"spec":"per call","label":"Premium Web Search","value":"$$0.140"},{"spec":"per call","label":"Code Interpreter","value":"$$0.084"},{"spec":"per image","label":"Image Generation","value":"$$0.280"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"mistral","provider_name":"Mistral AI","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mistral-small-4.png","is_featured":false,"is_new":false}},{"slug":"moss-video-and-audio","display_name":"MOSS Video and Audio","description":"Open-source 32B MoE foundation model that generates synchronized video and audio in one inference step with precise dual-tower lip-sync.","modalities":["video","text","image","audio"],"input_modalities":["text","image"],"output_modalities":["video","audio"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per video","label":"360p Video","value":"$$0.17"},{"spec":"per video","label":"720p Video","value":"$$2.82"},{"spec":"additional fee","label":"T2V Fast","value":"$$0.065"},{"spec":"additional fee","label":"T2V Quality","value":"$$0.13"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"openmoss","provider_name":"OpenMOSS","features":["audio_sync","lipsync"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/moss-video-and-audio.png","is_featured":false,"is_new":false}},{"slug":"nova-lite-1-0","display_name":"Nova Lite 1.0","description":"Low-cost multimodal foundation model for text, images, and video on a 300K context (up to ~30 min video), tuned for speed and affordability.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":300000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.069"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.28"},{"spec":"per 1M tokens","label":"Cached input","value":"$$0.0386"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/nova-lite-1-0.png","is_featured":false,"is_new":false}},{"slug":"nova-lite-2","display_name":"Nova Lite 2","description":"Fast, cost-effective multimodal reasoning model for text, images, documents, and video on a 1M context (long docs and ~90 min clips).","modalities":["text","image","video","document"],"input_modalities":["text","image","video","document"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.38"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.16"},{"spec":"per 1M tokens","label":"Cached input","value":"$$0.2128"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/nova-lite-2.png","is_featured":false,"is_new":false}},{"slug":"nova-micro-1-0","display_name":"Nova Micro 1.0","description":"Text-only foundation model tuned for ultra-low latency and cost on 128K context. Strong for summarization, translation, and chat with 44% cache discount.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.040"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.16"},{"spec":"per 1M tokens","label":"Cached input","value":"$$0.0224"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["fast"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/nova-micro-1-0.png","is_featured":false,"is_new":false}},{"slug":"nova-premier-1-0","display_name":"Nova Premier 1.0","description":"Most capable model in the family. Multimodal text/image/video on a 1M context with chain-of-thought reasoning across tools and data sources.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$3.00"},{"spec":"per 1M generated tokens","label":"Output","value":"$$15.00"},{"spec":"per 1M tokens","label":"Cached input","value":"$$1.68"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/nova-premier-1-0.png","is_featured":false,"is_new":false}},{"slug":"nova-pro-1-0","display_name":"Nova Pro 1.0","description":"Multimodal foundation model balancing accuracy, speed, and cost for text, images, and video on 300K context (up to ~30 min video).","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":300000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.40"},{"spec":"per 1M generated tokens","label":"Output","value":"$$9.60"},{"spec":"per 1M prompt tokens","label":"Latency Optimized Input","value":"$$3.00"},{"spec":"per 1M generated tokens","label":"Latency Optimized Output","value":"$$12.00"},{"spec":"per call when invoked","label":"Web Search (Linkup)","value":"$$0.013"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"amazon","provider_name":"Amazon","features":["vision"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/nova-pro-1-0.png","is_featured":false,"is_new":false}},{"slug":"openai-whisper-1","display_name":"OpenAI Whisper 1","description":"Whisper-1 speech-to-text transcription trained on multilingual supervised audio, with a 25 MB upload limit per file.","modalities":["audio","text"],"input_modalities":["audio"],"output_modalities":["text"],"category":"transcription","type":"stt","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/transcriptions"],"pricing_rows":[{"spec":"per minute","label":"Per Minute of Audio","value":"$$0.030"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"openai","provider_name":"OpenAI","features":["transcription","speech_to_text"],"type":"stt","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/openai-whisper-1.png","is_featured":false,"is_new":false}},{"slug":"perplexity-advanced-deep-research","display_name":"Perplexity Advanced Deep Research","description":"Institutional-grade research powered by Claude Opus 4.6 reasoning, with maximum depth, enhanced tool access, and extensive source coverage.","modalities":["text","image","document"],"input_modalities":["text","image","document"],"output_modalities":["text"],"category":"research","type":"research","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$12.00"},{"spec":"per 1M generated tokens","label":"Output","value":"$$60.00"},{"spec":"per call","label":"Web Search Call","value":"$$0.012"},{"spec":"per call","label":"URL Fetch Call","value":"$$0.0012"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search","reasoning","deep_research"],"type":"research","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-adv-deep-research.png","is_featured":false,"is_new":false}},{"slug":"perplexity-deep-research","display_name":"Perplexity Deep Research","description":"Research model for multi-step retrieval, synthesis, and reasoning, autonomously searching, reading, and evaluating sources across complex topics.","modalities":["text","image","document"],"input_modalities":["text","image","document"],"output_modalities":["text"],"category":"research","type":"research","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$4.80"},{"spec":"per 1M generated tokens","label":"Output","value":"$$19.00"},{"spec":"per 1M tokens","label":"Citation Tokens","value":"$$4.80"},{"spec":"per 1M tokens","label":"Reasoning Tokens","value":"$$7.20"},{"spec":"per query","label":"Search Queries","value":"$$0.012"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search","reasoning","deep_research"],"type":"research","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-deep-research.png","is_featured":false,"is_new":false}},{"slug":"perplexity-pro-search","display_name":"Perplexity Pro Search","description":"Sonar Pro as an agentic researcher: chains web searches, fetches full pages, and streams live reasoning, adapting strategy for complex queries.","modalities":["text","image"],"input_modalities":["text","image"],"output_modalities":["text"],"category":"research","type":"search","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$7.80"},{"spec":"per 1M generated tokens","label":"Output","value":"$$39.00"},{"spec":"per request","label":"Base Fee (Low Context)","value":"$$0.036"},{"spec":"per request","label":"Base Fee (Medium Context)","value":"$$0.047"},{"spec":"per request","label":"Base Fee (High Context)","value":"$$0.057"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-pro-search.png","is_featured":false,"is_new":false}},{"slug":"perplexity-search","display_name":"Perplexity Search","description":"Real-time web search with filtering by domain, language, date, and more. Returns search results, not LLM responses; no file uploads.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"search","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/search"],"pricing_rows":[{"spec":"per request","label":"Search Request","value":"$$0.0060"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search","no_llm"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-search.png","is_featured":false,"is_new":false}},{"slug":"perplexity-sonar","display_name":"Perplexity Sonar","description":"Real-time web-connected search with accurate citations and customizable sources for up-to-date AI search integration in production apps.","modalities":["text","image","document"],"input_modalities":["text","image","document"],"output_modalities":["text"],"category":"research","type":"search","context_window":127000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.40"},{"spec":"per 1M generated tokens","label":"Output","value":"$$2.40"},{"spec":"per request","label":"Base Fee (Low Context)","value":"$$0.012"},{"spec":"per request","label":"Base Fee (Medium Context)","value":"$$0.019"},{"spec":"per request","label":"Base Fee (High Context)","value":"$$0.029"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-sonar.png","is_featured":false,"is_new":false}},{"slug":"perplexity-sonar-pro","display_name":"Perplexity Sonar Pro","description":"Search-grounded model with double the citations and a larger context window, tuned for complex queries needing in-depth, nuanced answers.","modalities":["text","image","document"],"input_modalities":["text","image","document"],"output_modalities":["text"],"category":"research","type":"search","context_window":200000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$7.20"},{"spec":"per 1M generated tokens","label":"Output","value":"$$36.00"},{"spec":"per request","label":"Base Fee (Low Context)","value":"$$0.014"},{"spec":"per request","label":"Base Fee (Medium Context)","value":"$$0.024"},{"spec":"per request","label":"Base Fee (High Context)","value":"$$0.034"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-sonar-pro.png","is_featured":false,"is_new":false}},{"slug":"perplexity-sonar-reasoning-pro","display_name":"Perplexity Sonar Reasoning Pro","description":"Reasoning model on the uncensored open-source R1-1776 with web search, outperforming leading search engines and LLMs on the SimpleQA benchmark.","modalities":["text","image","document"],"input_modalities":["text","image","document"],"output_modalities":["text"],"category":"research","type":"search","context_window":128000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/search"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$4.80"},{"spec":"per 1M generated tokens","label":"Output","value":"$$19.00"},{"spec":"per request","label":"Base Fee (Low Context)","value":"$$0.014"},{"spec":"per request","label":"Base Fee (Medium Context)","value":"$$0.024"},{"spec":"per request","label":"Base Fee (High Context)","value":"$$0.034"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"perplexity","provider_name":"Perplexity","features":["web_search","reasoning"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/perplexity-sonar-rsn-pro.png","is_featured":false,"is_new":false}},{"slug":"pixverse-v5","display_name":"Pixverse v5","description":"Cinematic video generation in Text-to-Video, Image-to-Video, and Transition modes with high detail, fluid motion, and lifelike animations.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per video","label":"360p/540p 5s","value":"$$0.45"},{"spec":"per video","label":"360p/540p 8s","value":"$$0.90"},{"spec":"per video","label":"720p 5s","value":"$$0.60"},{"spec":"per video","label":"720p 8s","value":"$$1.20"},{"spec":"per video","label":"1080p 5s","value":"$$1.20"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"pixverse","provider_name":"PixVerse","features":["audio","styles"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/pixverse-v5.png","is_featured":false,"is_new":false}},{"slug":"pixverse-v5-6","display_name":"Pixverse v5.6","description":"Generates videos from text or 1-2 frame image prompts up to 1080p, multiple aspect ratios, 5-10s durations, with optional synchronized audio.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per video","label":"360p/540p 5s no audio","value":"$$0.40"},{"spec":"per video","label":"360p/540p 5s audio","value":"$$0.80"},{"spec":"per video","label":"360p/540p 8s no audio","value":"$$0.80"},{"spec":"per video","label":"360p/540p 8s audio","value":"$$1.60"},{"spec":"per video","label":"360p/540p 10s no audio","value":"$$0.88"},{"spec":"per video","label":"360p/540p 10s audio","value":"$$1.76"},{"spec":"per video","label":"720p 5s no audio","value":"$$0.65"},{"spec":"per video","label":"720p 5s audio","value":"$$1.30"},{"spec":"per video","label":"720p 8s no audio","value":"$$1.30"},{"spec":"per video","label":"720p 8s audio","value":"$$2.60"},{"spec":"per video","label":"720p 10s no audio","value":"$$1.43"},{"spec":"per video","label":"720p 10s audio","value":"$$2.86"},{"spec":"per video","label":"1080p 5s no audio","value":"$$0.75"},{"spec":"per video","label":"1080p 5s audio","value":"$$1.50"},{"spec":"per video","label":"1080p 8s no audio","value":"$$1.50"},{"spec":"per video","label":"1080p 8s audio","value":"$$3.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"pixverse","provider_name":"PixVerse","features":["audio","styles"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/pixverse-v5-6.png","is_featured":false,"is_new":false}},{"slug":"qwen-image-2-0","display_name":"Qwen Image 2.0","description":"Unified image generation and editing model with class-leading complex Chinese/English text rendering, realistic textures, and multi-image fusion.","modalities":["image","text"],"input_modalities":["text","image"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Standard","value":"$$0.0322 (was $0.035)"},{"spec":"per image","label":"Pro","value":"$$0.069 (was $0.075)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["text_rendering","image_editing","multi_image"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":false}},{"slug":"qwen3-5-flash","display_name":"Qwen3.5 Flash","description":"Vision-language model with hybrid linear-attention plus sparse MoE, 1M context, and fast multimodal text/image/video inference.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.090 (was $0.10)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.368 (was $0.40)"},{"spec":"per call","label":"Web Search","value":"$$0.015"},{"spec":"per call","label":"Image Search","value":"$$0.012"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["vision","web_search","code_interpreter","function_calling"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":false}},{"slug":"qwen3-5-omni-flash","display_name":"Qwen3.5 Omni Flash","description":"Cost-efficient omni-modal model handling text, image, audio, and video, with up to 3 hours of audio and 1 hour of video across 90+ languages.","modalities":["text","image","video","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["text","audio"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"per 1M prompt tokens $0.40; per 1M prompt tokens $3.00"},{"spec":"per 1M generated tokens","label":"Output","value":"per 1M generated tokens $2.20; per 1M generated tokens $11.90"},{"spec":"per request","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["vision","audio_in","audio_out","multilingual"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen3-5-omni-flash.png","is_featured":true,"is_new":false}},{"slug":"qwen3-5-omni-plus","display_name":"Qwen3.5 Omni Plus","description":"Flagship omni-modal model for text, image, audio, and video. 3h audio, 1h video, 90+ input and 30+ output languages, 55 voice timbres.","modalities":["text","image","video","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["text","audio"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages","POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"per 1M prompt tokens $1.40; per 1M prompt tokens $11.00"},{"spec":"per 1M generated tokens","label":"Output","value":"per 1M generated tokens $8.30; per 1M generated tokens $44.00"},{"spec":"per request","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["vision","audio_in","audio_out","multilingual"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-5-plus","display_name":"Qwen3.5 Plus","description":"Multimodal model with hybrid architecture for efficient deep thinking and visual understanding across text, image, and video on a 1M context.","modalities":["text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.36 (was $0.40); 256K-1M $1.08 (was $1.20)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $2.21 (was $2.40); 256K-1M $6.62 (was $7.20)"},{"spec":"per call","label":"Web Search","value":"$$0.015"},{"spec":"per call","label":"Image Search","value":"$$0.012"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["vision","web_search","code_interpreter","function_calling","reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-6-max-preview","display_name":"Qwen3.6 Max Preview","description":"Largest preview variant in the 3.6 series (text-only): improved coding agent execution, stronger front-end skills, and broader long-tail knowledge.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $1.31; 128K-256K $1.97"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $7.88; 128K-256K $11.82"},{"spec":"per call","label":"Web Search","value":"$$0.020"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","agentic_coding","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-6-plus","display_name":"Qwen3.6 Plus","description":"Vision-language model with major upgrades over 3.5: agentic and front-end coding, multimodal recognition, OCR, and object localization.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=256K $0.50; 256K-1M $2.00"},{"spec":"per 1M generated tokens","label":"Output","value":"<=256K $3.00; 256K-1M $6.00"},{"spec":"per call","label":"Web Search","value":"$$0.026"},{"spec":"per call","label":"Image Search","value":"$$0.0208"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["agentic_coding","cache","function_calling","reasoning","structured_output","video","vision","web_search"],"type":"chat","modality":"multimodal","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-max","display_name":"Qwen3 Max","description":"256K-context flagship with major improvements in reasoning, instruction following, and multilingual support, plus higher coding/math accuracy.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=32K $1.08 (was $1.20); 32K-128K $2.16 (was $2.40); 128K-256K $2.70 (was $3.00)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=32K $5.52 (was $6.00); 32K-128K $11.04 (was $12.00); 128K-256K $13.80 (was $15.00)"},{"spec":"per request","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","code_interpreter","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":false}},{"slug":"qwen3-max-preview","display_name":"Qwen3 Max Preview","description":"Preview release with major gains over the 2.5 series in Chinese-English understanding, complex instructions, multilingual ability, and tool use.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=32K $1.08 (was $1.20); 32K-128K $2.16 (was $2.40); 128K-256K $2.70 (was $3.00)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=32K $4.80 (was $6.00); 32K-128K $9.60 (was $12.00); 128K-256K $12.00 (was $15.00)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","code_interpreter","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-max-thinking","display_name":"Qwen3 Max Thinking","description":"Reasoning model with adaptive tool use (search, memory, code interpreter) and test-time scaling for higher accuracy on complex tasks.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=32K $1.08 (was $1.20); 32K-128K $2.16 (was $2.40); 128K-256K $2.70 (was $3.00)"},{"spec":"per 1M generated tokens","label":"Output","value":"<=32K $5.52 (was $6.00); 32K-128K $11.04 (was $12.00); 128K-256K $13.80 (was $15.00)"},{"spec":"per request","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["reasoning","code_interpreter","web_search","thinking"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":true,"is_new":false}},{"slug":"qwen3-rerank","display_name":"Qwen3 Rerank","description":"Semantic document reranker. Sorts up to 500 candidates per query by relevance, supports 100+ languages, and accepts a custom sorting instruction.","modalities":["text","ranking"],"input_modalities":["text"],"output_modalities":["ranking"],"category":"reranker","type":null,"context_window":4000,"api_status":"platform","supported_endpoints":["POST /v1/reranks"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.10"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["semantic ranking","multilingual","rag","custom instructions"],"type":"$undefined","modality":"$undefined","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":false,"is_new":true}},{"slug":"seed-2-0-code","display_name":"Seed 2.0 Code","description":"Coding-tuned 256K-context model with strong front-end results and multilingual programming support for AI coding tools and agents.","modalities":["text","image","video","document"],"input_modalities":["text","image","video","document"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.40; 128K-256K $0.80"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $2.40; 128K-256K $4.80"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["code_interpreter","reasoning","agentic_coding"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seed-2-0-code.png","is_featured":false,"is_new":false}},{"slug":"seed-2-0-lite","display_name":"Seed 2.0 Lite","description":"Balanced general-purpose model for high-frequency enterprise workloads: information processing, content, search, and data analysis.","modalities":["text","image","video","document"],"input_modalities":["text","image","video","document"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.31; 128K-256K $0.62"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $2.50; 128K-256K $5.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["vision","reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seed-2-0-lite.png","is_featured":false,"is_new":false}},{"slug":"seed-2-0-mini","display_name":"Seed 2.0 Mini","description":"Latency-focused multimodal model with 256K context, four reasoning effort modes, and image/video understanding for high-concurrency use.","modalities":["text","image","video","document"],"input_modalities":["text","image","video","document"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.12; 128K-256K $0.24"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $0.50; 128K-256K $1.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["vision","reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seed-2-0-mini.png","is_featured":false,"is_new":false}},{"slug":"seed-2-0-pro","display_name":"Seed 2.0 Pro","description":"Flagship general model with 256K context for complex reasoning, multimodal understanding, structured generation, and tool-augmented execution.","modalities":["text","image","video","document"],"input_modalities":["text","image","video","document"],"output_modalities":["text"],"category":"text","type":"chat","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"<=128K $0.63; 128K-256K $1.26"},{"spec":"per 1M generated tokens","label":"Output","value":"<=128K $3.79; 128K-256K $7.58"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["vision","reasoning"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seed-2-0-pro.png","is_featured":false,"is_new":false}},{"slug":"seedance-2-0-fast","display_name":"Seedance 2.0 Fast","description":"Speed-optimized 2.0 video variant for cinematic clips with native audio sync, camera control, and stable motion at lower cost per render.","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"T2V/I2V 480P","value":"$$0.122"},{"spec":"per second","label":"T2V/I2V 720P","value":"$$0.260"},{"spec":"per second","label":"Video Input 480P","value":"$$0.284"},{"spec":"per second","label":"Video Input 720P","value":"$$0.610"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["audio_sync","camera_control"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seedance-2-0-fast.png","is_featured":true,"is_new":false}},{"slug":"seedance-2-0-pro","display_name":"Seedance 2.0 Pro","description":"Multimodal video model for cinematic output from text, image, audio, or video inputs, with stable motion and consistent characters.","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"T2V/I2V 480P","value":"$$0.139"},{"spec":"per second","label":"T2V/I2V 720P","value":"$$0.300"},{"spec":"per second","label":"T2V/I2V 1080P","value":"$$0.749"},{"spec":"per second","label":"T2V/I2V 4K","value":"$$1.555"},{"spec":"per second","label":"Video Input 480P","value":"$$0.342"},{"spec":"per second","label":"Video Input 720P","value":"$$0.736"},{"spec":"per second","label":"Video Input 1080P","value":"$$1.841"},{"spec":"per second","label":"Video Input 4K","value":"$$3.732"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["audio_sync","camera_control","character_consistency"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seedance-2-0-pro.png","is_featured":true,"is_new":false}},{"slug":"seedream-5-0-lite","display_name":"Seedream 5.0 Lite","description":"Unified multimodal image model that reasons through prompts before rendering, producing high-resolution and consistent edits and brand visuals.","modalities":["image","text"],"input_modalities":["text","image"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Standard","value":"$$0.0350"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["reasoning","editing"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seedream-5-0-lite.png","is_featured":false,"is_new":false}},{"slug":"soulx-podcast","display_name":"SoulX Podcast","description":"Open-source voice model for long-form, multi-speaker podcast dialogue with paralinguistic control (laughter, sighs) and zero-shot voice cloning.","modalities":["audio","text"],"input_modalities":["text","audio"],"output_modalities":["audio"],"category":"audio","type":"tts","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/speech"],"pricing_rows":[{"spec":"per 1k characters","label":"Base","value":"$$0.015"},{"spec":"per 1k characters","label":"Dialect","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"soul-ai-lab","provider_name":"Soul AI Lab","features":["voice_cloning","multi_speaker","dialect","podcast"],"type":"tts","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/soulx-podcast.png","is_featured":false,"is_new":false}},{"slug":"stable-audio-2-0","display_name":"Stable Audio 2.0","description":"Generates audio up to 3 minutes from text prompts, supporting text-to-audio and audio-to-audio with adjustable duration, steps, and CFG scale.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/generations"],"pricing_rows":[{"spec":"per generation","label":"Base Cost","value":"$$0.58"},{"spec":"per step","label":"Per Step Cost","value":"$$0.00"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"stability","provider_name":"Stability AI","features":["music_generation","text_to_audio","sound_effects"],"type":"generation","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/stable-audio-2-0.png","is_featured":false,"is_new":false}},{"slug":"stable-audio-2-5","display_name":"Stable Audio 2.5","description":"Up-to-3-minute audio from text with text-to-audio, audio-to-audio, and audio inpainting for music production, sound design, and remixing.","modalities":["audio","text"],"input_modalities":["text"],"output_modalities":["audio"],"category":"audio","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/generations"],"pricing_rows":[{"spec":"per generation","label":"Generation","value":"$$0.68"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"stability","provider_name":"Stability AI","features":["music_generation","text_to_audio","sound_effects"],"type":"generation","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/stable-audio-2-5.png","is_featured":false,"is_new":false}},{"slug":"svi-2-0-pro","display_name":"SVI 2.0 Pro","description":"Stable Video Infinity 2.0 Pro on WAN 2.2: extends still images into theoretically infinite-length video while keeping consistent character IDs.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"480p Video","value":"$$0.057"},{"spec":"per second","label":"720p Video","value":"$$0.17"},{"spec":"additional fee","label":"T2V Fast","value":"$$0.065"},{"spec":"additional fee","label":"T2V Quality","value":"$$0.13"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"vita-epfl","provider_name":"VITA-Group / EPFL","features":["infinite_length","character_consistency"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/svi-2-0-pro.png","is_featured":true,"is_new":false}},{"slug":"tavily-research","display_name":"Tavily Research","description":"Multi-search research assistant that explores a topic, analyzes sources, and produces a detailed research report with citations.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"research","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/research","POST /v1/search"],"pricing_rows":[{"spec":"average per task","label":"Mini","value":"~$1.19"},{"spec":"average per task","label":"Pro","value":"~$2.75"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"tavily","provider_name":"Tavily","features":["web_search","citations","multi_search"],"type":"research","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/tavily-research.png","is_featured":false,"is_new":false}},{"slug":"tavily-search","display_name":"Tavily Search","description":"Web search with crawl, extract, and URL mapping for fast, structured retrieval across pages and domains for downstream pipelines.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"research","type":"search","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/search"],"pricing_rows":[{"spec":"per search","label":"Search (Basic/Fast/Ultra-Fast)","value":"$$0.0096"},{"spec":"per search","label":"Search (Advanced)","value":"$$0.019"},{"spec":"per search","label":"Search (Advanced + Answer)","value":"$$0.029"},{"spec":"per 5 URLs","label":"Extract (Basic)","value":"$$0.0096"},{"spec":"per 5 URLs","label":"Extract (Advanced)","value":"$$0.019"},{"spec":"per 10 pages","label":"Crawl/Map (basic)","value":"$$0.0096"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"tavily","provider_name":"Tavily","features":["web_search","crawl","extract","map"],"type":"search","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/tavily-search.png","is_featured":false,"is_new":false}},{"slug":"text-embedding-v4","display_name":"Text Embedding v4","description":"Multilingual text embedding with selectable output dimensions (64–2048). Up to 8,192 tokens per input.","modalities":["text","embedding"],"input_modalities":["text"],"output_modalities":["embedding"],"category":"embedding","type":null,"context_window":8192,"api_status":"platform","supported_endpoints":["POST /v1/embeddings"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.07"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["custom dimensions","sparse vectors","task instructions","batch"],"type":"$undefined","modality":"$undefined","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":"$undefined","is_new":true}},{"slug":"tongyi-embedding-vision-flash","display_name":"Tongyi Embedding Vision Flash","description":"Speed-optimised multimodal embedding — same shape as Vision-Plus, 3× cheaper image/video tokens.","modalities":["text","image","video","embedding"],"input_modalities":["text","image","video"],"output_modalities":["embedding"],"category":"embedding","type":null,"context_window":1024,"api_status":"platform","supported_endpoints":["POST /v1/embeddings"],"pricing_rows":[{"spec":"per 1M tokens","label":"Text input","value":"$$0.09"},{"spec":"per 1M tokens","label":"Image / video input","value":"$$0.03"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["multimodal","independent vectors","low cost"],"type":"$undefined","modality":"$undefined","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":"$undefined","is_new":true}},{"slug":"tongyi-embedding-vision-plus","display_name":"Tongyi Embedding Vision Plus","description":"Multimodal embedding producing independent vectors for text, image, and video inputs.","modalities":["text","image","video","embedding"],"input_modalities":["text","image","video"],"output_modalities":["embedding"],"category":"embedding","type":null,"context_window":1024,"api_status":"platform","supported_endpoints":["POST /v1/embeddings"],"pricing_rows":[{"spec":"per 1M tokens","label":"Text input","value":"$$0.09"},{"spec":"per 1M tokens","label":"Image / video input","value":"$$0.09"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["multimodal","independent vectors"],"type":"$undefined","modality":"$undefined","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/qwen.png","is_featured":"$undefined","is_new":true}},{"slug":"wan-2-6","display_name":"Wan 2.6","description":"Multimodal video generation model for cinematic, multi-shot stories with native audio-visual sync (lip-sync, dialogue, music, SFX).","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"Standard 720P","value":"$$0.09 (was $0.10)"},{"spec":"per second","label":"Standard 1080P","value":"$$0.138 (was $0.15)"},{"spec":"per second","label":"Flash 720P (audio)","value":"$$0.045 (was $0.050)"},{"spec":"per second","label":"Flash 720P (no audio)","value":"$$0.0225 (was $0.0250)"},{"spec":"per second","label":"Flash 1080P (audio)","value":"$$0.069 (was $0.0750)"},{"spec":"per second","label":"Flash 1080P (no audio)","value":"$$0.0345 (was $0.03750)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["audio_sync","character_consistency","multi_shot"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/wan.png","is_featured":false,"is_new":false}},{"slug":"wan-2-7","display_name":"Wan 2.7","description":"Multimodal video model supporting T2V, I2V, video editing, and reference-to-video, with high-fidelity output from text, image, or video inputs.","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"All Modes 720P","value":"$$0.10"},{"spec":"per second","label":"All Modes 1080P","value":"$$0.150"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["audio_sync","character_consistency","multi_shot"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/wan.png","is_featured":false,"is_new":false}},{"slug":"wan2-7-image","display_name":"Wan2.7 Image","description":"Image generation and editing companion model: text-to-image, bounding-box edits, and cohesive image sets, with up to 4K output on Pro.","modalities":["image","text"],"input_modalities":["text","image"],"output_modalities":["image"],"category":"image","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/images/generations"],"pricing_rows":[{"spec":"per image","label":"Standard","value":"$$0.030"},{"spec":"per image","label":"Pro","value":"$$0.075"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["image_editing","4k"],"type":"generation","modality":"image","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/wan.png","is_featured":false,"is_new":false}},{"slug":"whisper-large-v3-turbo","display_name":"Whisper Large v3 Turbo","description":"Controlled Whisper Large v3 Turbo transcription with multilingual ASR, translation, VAD, timestamps, subtitles, hotwords, and decoder controls.","modalities":["audio","text"],"input_modalities":["audio"],"output_modalities":["text"],"category":"transcription","type":"stt","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/audio/transcriptions"],"pricing_rows":[{"spec":"per minute of audio","label":"Controlled transcription","value":"$$0.005 (was $0.006)"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"openai","provider_name":"OpenAI","features":["transcription","translation","multilingual","word_timestamps","hotwords","srt_vtt"],"type":"stt","modality":"audio","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/whisper-large-v3-turbo.png","is_featured":true,"is_new":true}},{"slug":"manus","display_name":"Manus","description":"Autonomous AI agent that turns a high-level prompt into subtasks, calls tools and APIs, and delivers end-to-end results without manual orchestration.","modalities":["text","image","video","audio","document"],"input_modalities":["text","image","video","audio","document"],"output_modalities":["text","image","video","audio","document"],"category":"tools","type":"agent","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/agents/run","GET /v1/agents/{task_id}","GET /v1/agents/{task_id}/messages","POST /v1/agents/{task_id}/stop"],"pricing_rows":[{"spec":"per task","label":"Adaptive - Manus 1.6 Lite","value":"$$1.44 - $2.63"},{"spec":"per task","label":"Adaptive - Manus 1.6","value":"$$2.89 - $5.25"},{"spec":"per task","label":"Adaptive - Manus 1.6 Max","value":"$$5.25 - $9.19"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"manus","provider_name":"Manus","features":["autonomous","multi_step","tool_use"],"type":"agent","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/manus.png","is_featured":false,"is_new":false}},{"slug":"deepseek-v4-flash:variant1","display_name":"DeepSeek V4 Flash (Variant 1)","description":"Lightweight MoE model with 284B total / 13B active parameters and native 1M context, tuned for low-latency, cost-effective high-concurrency use.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.20"},{"spec":"per 1M generated tokens","label":"Output","value":"$$0.40"},{"spec":"per request when enabled","label":"Web search","value":"$$0.02"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":false,"is_new":true}},{"slug":"deepseek-v4-pro:variant1","display_name":"DeepSeek V4 Pro (Variant 1)","description":"Flagship MoE LLM with 1.6T total / 49B active parameters and native 1M context for advanced math, logical inference, and specialized coding.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.40"},{"spec":"per 1M generated tokens","label":"Output","value":"$$4.80"},{"spec":"per request when enabled","label":"Web search","value":"$$0.02"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"deepseek","provider_name":"DeepSeek","features":["reasoning","web_search"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/deepseek.png","is_featured":false,"is_new":true}},{"slug":"grok-imagine-video-1-5","display_name":"Grok Imagine Video 1.5","description":"Image-to-video model that animates a source image with prompt-guided motion, up to 15 seconds at 480p or 720p across seven aspect ratios.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per image","label":"Image input","value":"$$0.05"},{"spec":"per second","label":"480p","value":"$$0.096"},{"spec":"per second","label":"720p","value":"$$0.168"}],"requires_image_input":true,"max_image_inputs":1,"metadata":{"provider":"xai","provider_name":"xAI","features":["image_to_video","video_generation"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/grok-imagine-video-1-5.png","is_featured":true,"is_new":true}},{"slug":"mimo-v2-5-pro","display_name":"MiMo V2.5 Pro","description":"Top-tier model for agentic workflows, complex software engineering, and long-horizon tasks, sustaining work across 1000+ tool calls on 1M context.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$2.175"},{"spec":"per 1M generated tokens","label":"Output","value":"$$4.35"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.018"},{"spec":"per call","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"xiaomi","provider_name":"Xiaomi","features":["reasoning","agentic"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mimo-v2-5-pro.png","is_featured":false,"is_new":false}},{"slug":"mimo-v2-5","display_name":"MiMo V2.5","description":"Multimodal model with native visual and audio understanding on a 1M context, designed to reason and act across modalities in agentic workflows.","modalities":["text","image","video","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["text"],"category":"text","type":"chat","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.70"},{"spec":"per 1M generated tokens","label":"Output","value":"$$1.40"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.014"},{"spec":"per call","label":"Web Search","value":"$$0.015"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"xiaomi","provider_name":"Xiaomi","features":["vision","audio_in"],"type":"chat","modality":"text","category_label":null,"modality_label":null,"region":null,"logo":"https://media.empiriolabs.ai/model-logos/mimo-v2-5.png","is_featured":false,"is_new":false}},{"slug":"glm-5-2:variant1","display_name":"GLM 5.2 (Variant 1)","description":"Reasoning and coding model with a 1M token context, 128K output, adjustable reasoning effort, and tool calling.","modalities":["text"],"input_modalities":["text"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":1000000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$1.10 (was $1.40)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.851 (was $4.40)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.275"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"zhipu","provider_name":"Z.ai","features":["reasoning","function_calling","structured_output","cache"],"type":"reasoning","modality":"text","category_label":null,"modality_label":null,"region":"Germany","logo":"https://media.empiriolabs.ai/model-logos/glm.png","is_featured":false,"is_new":true}},{"slug":"kimi-k2-7-code:variant1","display_name":"Kimi K2.7 Code (Variant 1)","description":"Kimi K2.7 Code is Moonshot's trillion-parameter agentic coding model with 256K context, always-on reasoning, and text, image, and video inputs.","modalities":["multimodal","text","image","video"],"input_modalities":["text","image","video"],"output_modalities":["text"],"category":"text","type":"reasoning","context_window":256000,"api_status":"platform","supported_endpoints":["POST /v1/chat/completions","POST /v1/responses","POST /v1/messages"],"pricing_rows":[{"spec":"per 1M prompt tokens","label":"Input","value":"$$0.8939 (was $0.95)"},{"spec":"per 1M generated tokens","label":"Output","value":"$$3.7131 (was $4.00)"},{"spec":"per 1M cached input tokens","label":"Implicit cache read","value":"$$0.1788"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"moonshot","provider_name":"Moonshot AI","features":["reasoning","function_calling","structured_output","multimodal","agentic_coding","cache"],"type":"reasoning","modality":"multimodal","category_label":null,"modality_label":null,"region":"Germany","logo":"https://media.empiriolabs.ai/model-logos/kimi.png","is_featured":false,"is_new":true}},{"slug":"happyhorse-1-1","display_name":"HappyHorse 1.1","description":"Text, image, and reference-to-video in one model. Cinematic motion, character consistency across up to 9 references, and synchronized native audio.","modalities":["video","text","image"],"input_modalities":["text","image"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"720p","value":"$$0.14"},{"spec":"per second","label":"1080p","value":"$$0.18"}],"requires_image_input":false,"max_image_inputs":9,"metadata":{"provider":"alibaba","provider_name":"Alibaba Cloud","features":["video_generation","image_to_video","reference_to_video","audio_sync"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Singapore","logo":"https://media.empiriolabs.ai/model-logos/happyhorse-1-1.png","is_featured":true,"is_new":true}},{"slug":"seedance-2-0-mini","display_name":"Seedance 2.0 Mini","description":"The fastest, most affordable Seedance 2.0 tier for short cinematic clips with native audio, camera control, and image or video inputs at 480p and 720p.","modalities":["video","text","image","audio"],"input_modalities":["text","image","video","audio"],"output_modalities":["video"],"category":"video","type":"generation","context_window":null,"api_status":"platform","supported_endpoints":["POST /v1/videos/generations"],"pricing_rows":[{"spec":"per second","label":"T2V/I2V 480P","value":"$$0.070"},{"spec":"per second","label":"T2V/I2V 720P","value":"$$0.150"},{"spec":"per second","label":"Video Input 480P","value":"$$0.167"},{"spec":"per second","label":"Video Input 720P","value":"$$0.359"}],"requires_image_input":false,"max_image_inputs":"$undefined","metadata":{"provider":"bytedance","provider_name":"ByteDance","features":["audio_sync","camera_control","image_to_video","video_editing"],"type":"generation","modality":"video","category_label":null,"modality_label":null,"region":"Malaysia","logo":"https://media.empiriolabs.ai/model-logos/seedance-2-0-mini.png","is_featured":true,"is_new":true}}],"defaultSlug":"glm-5-2","seoSlug":"glm-tts","catalogSuggestions":{"3d":[],"glm-5-1":["Use function calling to pick a tool for weather data.","Return a JSON object with a concise project plan.","Summarize this long document and list unresolved risks."]},"signedIn":false,"mediaRetentionDays":7,"savedChatsAtCap":false,"initialLocale":"en","composeAllowed":false}]]