Image To Structured Output
Extract structured data from images.
Image To Structured Output.
1"""2Image To Structured Output3=============================45Image To Structured Output.6"""78from typing import List910from kern.agent import Agent11from kern.media import Image12from kern.models.openai import OpenAIResponses13from pydantic import BaseModel, Field14from rich.pretty import pprint151617class MovieScript(BaseModel):18 name: str = Field(..., description="Give a name to this movie")19 setting: str = Field(20 ..., description="Provide a nice setting for a blockbuster movie."21 )22 characters: List[str] = Field(..., description="Name of characters for this movie.")23 storyline: str = Field(24 ..., description="3 sentence storyline for the movie. Make it exciting!"25 )262728# ---------------------------------------------------------------------------29# Create Agent30# ---------------------------------------------------------------------------31agent = Agent(model=OpenAIResponses(id="gpt-5.2"), output_schema=MovieScript)3233# ---------------------------------------------------------------------------34# Run Agent35# ---------------------------------------------------------------------------36if __name__ == "__main__":37 response = agent.run(38 "Write a movie about this image",39 images=[40 Image(41 url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg"42 )43 ],44 stream=True,45 )4647 for event in response:48 pprint(event.content)Run the Example
1# Clone and setup repo2git clone https://github.com/kern-ai/kern.git3cd kern/cookbook/02_agents/12_multimodal45# Create and activate virtual environment6./scripts/demo_setup.sh7source .venvs/demo/bin/activate89python image_to_structured_output.py