Audio Output Agent

Code

1from kern.agent import Agent, RunOutput  # noqa
2from kern.models.openai import OpenAIResponses
3from kern.utils.audio import write_audio_to_file
4from kern.db.in_memory import InMemoryDb
5
6# Provide the agent with the audio file and audio configuration and get result as text + audio
7agent = Agent(
8    model=OpenAIChat(
9        id="gpt-4o-audio-preview",
10        modalities=["text", "audio"],
11        audio={"voice": "sage", "format": "wav"},
12    ),
13    db=InMemoryDb(),
14    add_history_to_context=True,
15    markdown=True,
16)
17run_output: RunOutput = agent.run("Tell me a 5 second scary story")
18
19# Save the response audio to a file
20if run_output.response_audio:
21    write_audio_to_file(
22        audio=run_output.response_audio.content, filename="tmp/scary_story.wav"
23    )
24
25run_output: RunOutput = agent.run("What would be in a sequal of this story?")
26
27# Save the response audio to a file
28if run_output.response_audio:
29    write_audio_to_file(
30        audio=run_output.response_audio.content,
31        filename="tmp/scary_story_sequal.wav",
32    )

Usage

Set up your virtual environment

1uv venv --python 3.12
2source .venv/bin/activate

1uv venv --python 3.12
2.venv\Scripts\activate

Set your API key

1export OPENAI_API_KEY=xxx

Install dependencies

1uv pip install -U openai kern-ai

Run Agent

1python cookbook/11_models/openai/chat/audio_output_agent.py