Async Agent as Judge

Asynchronous evaluation with Agent as Judge

This example demonstrates an asynchronous Agent as Judge evaluation with async callbacks.

Add the following code to your Python file

1import asyncio
2
3from kern.agent import Agent
4from kern.db.sqlite import AsyncSqliteDb
5from kern.eval.agent_as_judge import AgentAsJudgeEval, AgentAsJudgeEvaluation
6from kern.models.openai import OpenAIResponses
7
8
9async def on_evaluation_failure(evaluation: AgentAsJudgeEvaluation):
10 """Async callback triggered when evaluation fails (score < threshold)."""
11 print(f"Evaluation failed - Score: {evaluation.score}/10")
12 print(f"Reason: {evaluation.reason}")
13
14
15async def main():
16 # Setup database to persist eval results
17 db = AsyncSqliteDb(db_file="tmp/agent_as_judge_async.db")
18
19 agent = Agent(
20 model=OpenAIResponses(id="gpt-5.2"),
21 instructions="Provide helpful and informative answers.",
22 db=db,
23 )
24
25 response = await agent.arun("Explain machine learning in simple terms")
26
27 evaluation = AgentAsJudgeEval(
28 name="ML Explanation Quality",
29 model=OpenAIResponses(id="gpt-5.2"),
30 criteria="Explanation should be clear, beginner-friendly, and avoid jargon",
31 scoring_strategy="numeric",
32 threshold=9,
33 on_fail=on_evaluation_failure,
34 db=db,
35 )
36
37 result = await evaluation.arun(
38 input="Explain machine learning in simple terms",
39 output=str(response.content),
40 print_results=True,
41 print_summary=True,
42 )
43
44
45if __name__ == "__main__":
46 asyncio.run(main())

Set up your virtual environment

1uv venv --python 3.12
2source .venv/bin/activate
1uv venv --python 3.12
2.venv\Scripts\activate

Install dependencies

1uv pip install -U kern-ai openai

Export your OpenAI API key

1export OPENAI_API_KEY="your_openai_api_key_here"
1$Env:OPENAI_API_KEY="your_openai_api_key_here"

Run the example

1python agent_as_judge_async.py