Audio Streaming

Mono (Change to 2 if Stereo).

Audio Streaming.

1"""
2Audio Streaming
3=============================
4
5Audio Streaming.
6"""
7
8import base64
9import wave
10from typing import Iterator
11
12from kern.agent import Agent, RunOutputEvent
13from kern.models.openai import OpenAIChat
14
15# Audio Configuration
16SAMPLE_RATE = 24000 # Hz (24kHz)
17CHANNELS = 1 # Mono (Change to 2 if Stereo)
18SAMPLE_WIDTH = 2 # Bytes (16 bits)
19
20# Provide the agent with the audio file and audio configuration and get result as text + audio
21# ---------------------------------------------------------------------------
22# Create Agent
23# ---------------------------------------------------------------------------
24agent = Agent(
25 model=OpenAIChat(
26 id="gpt-4o-audio-preview",
27 modalities=["text", "audio"],
28 audio={
29 "voice": "alloy",
30 "format": "pcm16",
31 }, # Only pcm16 is supported with streaming
32 ),
33)
34
35# ---------------------------------------------------------------------------
36# Run Agent
37# ---------------------------------------------------------------------------
38if __name__ == "__main__":
39 output_stream: Iterator[RunOutputEvent] = agent.run(
40 "Tell me a 10 second story", stream=True
41 )
42
43 filename = "tmp/response_stream.wav"
44
45 # Open the file once in append-binary mode
46 with wave.open(str(filename), "wb") as wav_file:
47 wav_file.setnchannels(CHANNELS)
48 wav_file.setsampwidth(SAMPLE_WIDTH)
49 wav_file.setframerate(SAMPLE_RATE)
50
51 # Iterate over generated audio
52 for response in output_stream:
53 response_audio = response.response_audio # type: ignore
54 if response_audio:
55 if response_audio.transcript:
56 print(response_audio.transcript, end="", flush=True)
57 if response_audio.content:
58 try:
59 pcm_bytes = base64.b64decode(response_audio.content)
60 wav_file.writeframes(pcm_bytes)
61 except Exception as e:
62 print(f"Error decoding audio: {e}")
63 print()

Run the Example

1# Clone and setup repo
2git clone https://github.com/kern-ai/kern.git
3cd kern/cookbook/02_agents/12_multimodal
4
5# Create and activate virtual environment
6./scripts/demo_setup.sh
7source .venvs/demo/bin/activate
8
9python audio_streaming.py