Audio To Text
Give a transcript of this audio conversation. Use speaker A, speaker B to identify speakers.
Audio To Text.
1"""2Audio To Text3=============================45Audio To Text.6"""78import requests9from kern.agent import Agent10from kern.media import Audio11from kern.models.google import Gemini1213# ---------------------------------------------------------------------------14# Create Agent15# ---------------------------------------------------------------------------16agent = Agent(17 model=Gemini(id="gemini-3-flash-preview"),18 markdown=True,19)2021url = "https://kern-public.s3.us-east-1.amazonaws.com/demo_data/QA-01.mp3"2223response = requests.get(url)24audio_content = response.content2526# ---------------------------------------------------------------------------27# Run Agent28# ---------------------------------------------------------------------------29if __name__ == "__main__":30 # Give a transcript of this audio conversation. Use speaker A, speaker B to identify speakers.3132 agent.print_response(33 "Give a transcript of this audio conversation. Use speaker A, speaker B to identify speakers.",34 audio=[Audio(content=audio_content)],35 stream=True,36 )Run the Example
1# Clone and setup repo2git clone https://github.com/kern-ai/kern.git3cd kern/cookbook/02_agents/12_multimodal45# Create and activate virtual environment6./scripts/demo_setup.sh7source .venvs/demo/bin/activate89python audio_to_text.py