Image To Audio
Convert image descriptions to audio output.
Image To Audio.
1"""2Image To Audio3=============================45Image To Audio.6"""78from pathlib import Path910from kern.agent import Agent, RunOutput11from kern.media import Image12from kern.models.openai import OpenAIChat13from kern.utils.audio import write_audio_to_file14from rich import print15from rich.text import Text1617cwd = Path(__file__).parent.resolve()1819# ---------------------------------------------------------------------------20# Create Agent21# ---------------------------------------------------------------------------22image_agent = Agent(model=OpenAIChat(id="gpt-4o"))2324image_path = Path(__file__).parent.joinpath("sample.jpg")2526# ---------------------------------------------------------------------------27# Run Agent28# ---------------------------------------------------------------------------29if __name__ == "__main__":30 image_story: RunOutput = image_agent.run(31 "Write a 3 sentence fiction story about the image",32 images=[Image(filepath=image_path)],33 )34 formatted_text = Text.from_markup(35 f":sparkles: [bold magenta]Story:[/bold magenta] {image_story.content} :sparkles:"36 )37 print(formatted_text)3839 audio_agent = Agent(40 model=OpenAIChat(41 id="gpt-4o-audio-preview",42 modalities=["text", "audio"],43 audio={"voice": "sage", "format": "wav"},44 ),45 )4647 audio_story: RunOutput = audio_agent.run(48 f"Narrate the story with flair: {image_story.content}"49 )50 if audio_story.response_audio is not None:51 write_audio_to_file(52 audio=audio_story.response_audio.content, filename="tmp/sample_story.wav"53 )Run the Example
1# Clone and setup repo2git clone https://github.com/kern-ai/kern.git3cd kern/cookbook/02_agents/12_multimodal45# Create and activate virtual environment6./scripts/demo_setup.sh7source .venvs/demo/bin/activate89python image_to_audio.py