Image Editing Agent

Code

1from io import BytesIO
2
3from kern.agent import Agent, RunOutput # noqa
4from kern.media import Image
5from kern.models.google import Gemini
6from PIL import Image as PILImage
7
8# No system message should be provided (Gemini requires only the image)
9agent = Agent(
10 model=Gemini(
11 id="gemini-2.0-flash-exp-image-generation",
12 response_modalities=["Text", "Image"],
13 )
14)
15
16# Print the response in the terminal
17response = agent.run(
18 "Can you add a Llama in the background of this image?",
19 images=[Image(filepath="tmp/test_photo.png")],
20)
21
22# Retrieve and display generated images using get_last_run_output
23run_response = agent.get_last_run_output()
24if run_response and isinstance(run_response, RunOutput) and run_response.images:
25 for image_response in run_response.images:
26 image_bytes = image_response.content
27 if image_bytes:
28 image = PILImage.open(BytesIO(image_bytes))
29 image.show()
30 # Save the image to a file
31 # image.save("generated_image.png")
32else:
33 print("No images found in run response")

Usage

Set up your virtual environment

1uv venv --python 3.12
2source .venv/bin/activate
1uv venv --python 3.12
2.venv\Scripts\activate

Prepare your image

Place an image file at tmp/test_photo.png or update the filepath in the code to point to your image.

Set your API key

1export GOOGLE_API_KEY=xxx

Install dependencies

1uv pip install -U google-genai pillow kern-ai

Run Agent

1python cookbook/11_models/google/gemini/image_editing.py