Image To Structured Output

Extract structured data from images.

Image To Structured Output.

1"""
2Image To Structured Output
3=============================
4
5Image To Structured Output.
6"""
7
8from typing import List
9
10from kern.agent import Agent
11from kern.media import Image
12from kern.models.openai import OpenAIResponses
13from pydantic import BaseModel, Field
14from rich.pretty import pprint
15
16
17class MovieScript(BaseModel):
18 name: str = Field(..., description="Give a name to this movie")
19 setting: str = Field(
20 ..., description="Provide a nice setting for a blockbuster movie."
21 )
22 characters: List[str] = Field(..., description="Name of characters for this movie.")
23 storyline: str = Field(
24 ..., description="3 sentence storyline for the movie. Make it exciting!"
25 )
26
27
28# ---------------------------------------------------------------------------
29# Create Agent
30# ---------------------------------------------------------------------------
31agent = Agent(model=OpenAIResponses(id="gpt-5.2"), output_schema=MovieScript)
32
33# ---------------------------------------------------------------------------
34# Run Agent
35# ---------------------------------------------------------------------------
36if __name__ == "__main__":
37 response = agent.run(
38 "Write a movie about this image",
39 images=[
40 Image(
41 url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg"
42 )
43 ],
44 stream=True,
45 )
46
47 for event in response:
48 pprint(event.content)

Run the Example

1# Clone and setup repo
2git clone https://github.com/kern-ai/kern.git
3cd kern/cookbook/02_agents/12_multimodal
4
5# Create and activate virtual environment
6./scripts/demo_setup.sh
7source .venvs/demo/bin/activate
8
9python image_to_structured_output.py