Agent as Judge with Guidelines

Using additional guidelines for more detailed evaluation criteria

This example demonstrates using additional guidelines to provide more specific evaluation criteria.

Add the following code to your Python file

1from typing import Optional
2
3from kern.agent import Agent
4from kern.db.sqlite import SqliteDb
5from kern.eval.agent_as_judge import AgentAsJudgeEval, AgentAsJudgeResult
6from kern.models.openai import OpenAIResponses
7
8# Setup database to persist eval results
9db = SqliteDb(db_file="tmp/agent_as_judge_guidelines.db")
10
11agent = Agent(
12 model=OpenAIResponses(id="gpt-5.2"),
13 instructions="You are a Tesla Model 3 product specialist. Provide detailed and helpful specifications.",
14 db=db,
15)
16
17response = agent.run("What is the maximum speed of the Tesla Model 3?")
18
19evaluation = AgentAsJudgeEval(
20 name="Product Info Quality",
21 model=OpenAIResponses(id="gpt-5.2"),
22 criteria="Response should be informative, well-formatted, and accurate for product specifications",
23 scoring_strategy="numeric",
24 threshold=8,
25 additional_guidelines=[
26 "Must include specific numbers with proper units (mph, km/h, etc.)",
27 "Should provide context for different model variants if applicable",
28 "Information should be technically accurate and complete",
29 ],
30 db=db,
31)
32
33result: Optional[AgentAsJudgeResult] = evaluation.run(
34 input="What is the maximum speed?",
35 output=str(response.content),
36 print_results=True,
37)
38
39# Query database for stored results
40print("Database Results:")
41eval_runs = db.get_eval_runs()
42print(f"Total evaluations stored: {len(eval_runs)}")
43if eval_runs:
44 latest = eval_runs[-1]
45 print(f"Eval ID: {latest.run_id}")
46 print(f"Additional guidelines used: {len(evaluation.additional_guidelines)}")

Set up your virtual environment

1uv venv --python 3.12
2source .venv/bin/activate
1uv venv --python 3.12
2.venv\Scripts\activate

Install dependencies

1uv pip install -U kern-ai openai

Export your OpenAI API key

1export OPENAI_API_KEY="your_openai_api_key_here"
1$Env:OPENAI_API_KEY="your_openai_api_key_here"

Run the example

1python agent_as_judge_with_guidelines.py