inputSchema
{
"type": "object",
"required": [
"text",
"apiKeys"
],
"properties": {
"text": {
"type": "string",
"maxLength": 5000,
"minLength": 10,
"description": "Source text to convert into video (200-2000 chars recommended for ~2min)"
},
"style": {
"enum": [
"cinematic",
"cartoon",
"documentary",
"minimalist",
"vlog"
],
"type": "string",
"default": "cinematic",
"description": "Visual style for generated scenes"
},
"title": {
"type": "string",
"maxLength": 100,
"description": "Video title displayed in the opening scene"
},
"voice": {
"type": "object",
"properties": {
"speed": {
"type": "number",
"default": 1,
"maximum": 2,
"minimum": 0.5,
"description": "Narration speed multiplier"
},
"language": {
"enum": [
"zh-CN",
"en-US",
"ja-JP",
"ko-KR"
],
"type": "string",
"default": "zh-CN",
"description": "Narration language"
}
},
"description": "Voiceover configuration"
},
"apiKeys": {
"type": "object",
"required": [
"llm"
],
"properties": {
"llm": {
"type": "object",
"required": [
"apiKey"
],
"properties": {
"model": {
"type": "string",
"default": "gpt-4o",
"description": "Model name (e.g. gpt-4o, claude-sonnet-4-20250514, deepseek-chat)"
},
"apiKey": {
"type": "string",
"description": "API key for the LLM provider"
},
"baseUrl": {
"type": "string",
"description": "Custom API base URL (for proxies or self-hosted)"
},
"provider": {
"enum": [
"openai",
"anthropic",
"deepseek"
],
"type": "string",
"default": "openai",
"description": "LLM provider"
}
},
"description": "LLM provider for scene decomposition"
},
"tts": {
"type": "object",
"properties": {
"voice": {
"type": "string",
"default": "alloy",
"description": "Voice ID (e.g. alloy, echo, shimmer for OpenAI)"
},
"apiKey": {
"type": "string",
"description": "API key (defaults to llm.apiKey if same provider)"
},
"baseUrl": {
"type": "string",
"description": "Custom API base URL"
},
"provider": {
"enum": [
"openai",
"azure",
"elevenlabs"
],
"type": "string",
"default": "openai",
"description": "TTS provider"
}
},
"description": "Text-to-speech provider"
},
"image": {
"type": "object",
"properties": {
"model": {
"type": "string",
"default": "dall-e-3",
"description": "Model name (e.g. dall-e-3, stable-diffusion-xl)"
},
"apiKey": {
"type": "string",
"description": "API key (defaults to llm.apiKey if same provider)"
},
"baseUrl": {
"type": "string",
"description": "Custom API base URL"
},
"provider": {
"enum": [
"openai",
"stability",
"replicate"
],
"type": "string",
"default": "openai",
"description": "Image generation provider"
}
},
"description": "Image generation provider"
}
},
"description": "API keys for external AI services"
},
"resolution": {
"enum": [
"1024x1024",
"1024x1792",
"1792x1024"
],
"type": "string",
"default": "1792x1024",
"description": "Image resolution per scene"
}
}
}
outputSchema
{
"type": "object",
"required": [
"scenes",
"duration",
"sceneCount"
],
"properties": {
"title": {
"type": "string",
"description": "Video title"
},
"scenes": {
"type": "array",
"items": {
"type": "object",
"properties": {
"index": {
"type": "integer"
},
"endTime": {
"type": "number"
},
"narration": {
"type": "string"
},
"startTime": {
"type": "number"
},
"audioBase64": {
"type": "string",
"description": "Base64-encoded MP3 audio"
},
"imageBase64": {
"type": "string",
"description": "Base64-encoded PNG image"
},
"visualPrompt": {
"type": "string"
}
}
},
"description": "Generated scenes with base64-encoded assets"
},
"duration": {
"type": "number",
"description": "Estimated total video duration in seconds"
},
"ffmpegHint": {
"type": "string",
"description": "FFmpeg command to assemble saved assets into final MP4"
},
"sceneCount": {
"type": "integer",
"description": "Number of scenes generated"
}
}
}