Agent as Judge with Teams

Evaluating team outputs with Agent as Judge

This example demonstrates evaluating team outputs using Agent as Judge evaluation.

Add the following code to your Python file

1from typing import Optional
2
3from kern.agent import Agent
4from kern.db.sqlite import SqliteDb
5from kern.eval.agent_as_judge import AgentAsJudgeEval, AgentAsJudgeResult
6from kern.models.openai import OpenAIResponses
7from kern.team.team import Team
8
9# Setup database to persist eval results
10db = SqliteDb(db_file="tmp/agent_as_judge_team.db")
11
12# Setup a team with researcher and writer
13researcher = Agent(
14 name="Researcher",
15 role="Research and gather information",
16 model=OpenAIResponses(id="gpt-5.2"),
17)
18
19writer = Agent(
20 name="Writer",
21 role="Write clear and concise summaries",
22 model=OpenAIResponses(id="gpt-5.2"),
23)
24
25research_team = Team(
26 name="Research Team",
27 model=OpenAIResponses(id="gpt-5.2"),
28 members=[researcher, writer],
29 instructions=["First research the topic thoroughly, then write a clear summary."],
30 db=db,
31)
32
33response = research_team.run("Explain quantum computing")
34
35evaluation = AgentAsJudgeEval(
36 name="Team Response Quality",
37 model=OpenAIResponses(id="gpt-5.2"),
38 criteria="Response should be well-researched, clear, and comprehensive with good flow",
39 scoring_strategy="binary",
40 db=db,
41)
42
43result: Optional[AgentAsJudgeResult] = evaluation.run(
44 input="Explain quantum computing",
45 output=str(response.content),
46 print_results=True,
47 print_summary=True,
48)
49
50# Query database for stored results
51print("Database Results:")
52eval_runs = db.get_eval_runs()
53print(f"Total evaluations stored: {len(eval_runs)}")
54if eval_runs:
55 latest = eval_runs[-1]
56 print(f"Eval ID: {latest.run_id}")
57 print(f"Team: {research_team.name}")

Set up your virtual environment

1uv venv --python 3.12
2source .venv/bin/activate
1uv venv --python 3.12
2.venv\Scripts\activate

Install dependencies

1uv pip install -U kern-ai openai

Export your OpenAI API key

1export OPENAI_API_KEY="your_openai_api_key_here"
1$Env:OPENAI_API_KEY="your_openai_api_key_here"

Run the example

1python agent_as_judge_team.py