Audio Input Output
Fetch the audio file and convert it to a base64 encoded string.
Audio Input Output.
1"""2Audio Input Output3=============================45Audio Input Output.6"""78import requests9from kern.agent import Agent10from kern.media import Audio11from kern.models.openai import OpenAIChat12from kern.utils.audio import write_audio_to_file13from rich.pretty import pprint1415# Fetch the audio file and convert it to a base64 encoded string16url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"17response = requests.get(url)18response.raise_for_status()19wav_data = response.content2021# ---------------------------------------------------------------------------22# Create Agent23# ---------------------------------------------------------------------------24agent = Agent(25 model=OpenAIChat(26 id="gpt-4o-audio-preview",27 modalities=["text", "audio"],28 audio={"voice": "sage", "format": "wav"},29 ),30 markdown=True,31)3233# ---------------------------------------------------------------------------34# Run Agent35# ---------------------------------------------------------------------------36if __name__ == "__main__":37 run_response = agent.run(38 "What's in these recording?",39 audio=[Audio(content=wav_data, format="wav")],40 )4142 if run_response.response_audio is not None:43 pprint(run_response.content)44 write_audio_to_file(45 audio=run_response.response_audio.content, filename="tmp/result.wav"46 )Run the Example
1# Clone and setup repo2git clone https://github.com/kern-ai/kern.git3cd kern/cookbook/02_agents/12_multimodal45# Create and activate virtual environment6./scripts/demo_setup.sh7source .venvs/demo/bin/activate89python audio_input_output.py