Openai Moderation

Example demonstrating how to use OpenAI moderation guardrails with Kern Agent.

1"""
2Openai Moderation
3=============================
4
5Example demonstrating how to use OpenAI moderation guardrails with Kern Agent.
6"""
7
8import asyncio
9import json
10
11from kern.agent import Agent
12from kern.exceptions import InputCheckError
13from kern.guardrails import OpenAIModerationGuardrail
14from kern.media import Image
15from kern.models.openai import OpenAIResponses
16
17
18# ---------------------------------------------------------------------------
19# Create Agent
20# ---------------------------------------------------------------------------
21async def main():
22 """Demonstrate OpenAI moderation guardrails functionality."""
23 print("OpenAI Moderation Guardrails Demo")
24 print("=" * 50)
25
26 # ---------------------------------------------------------------------------
27 # Create Agent
28 # ---------------------------------------------------------------------------
29
30 basic_agent = Agent(
31 name="Basic Moderated Agent",
32 model=OpenAIResponses(id="gpt-5-mini"),
33 pre_hooks=[OpenAIModerationGuardrail()],
34 description="An agent with basic OpenAI content moderation.",
35 instructions="You are a helpful assistant that provides information and answers questions.",
36 )
37
38 # Test 1: Basic text moderation with default settings
39 print("\n[TEST 1] Normal request without policy violations")
40 print("-" * 50)
41
42 try:
43 await basic_agent.aprint_response(
44 input="Can you help me understand machine learning concepts?",
45 )
46 print("[OK] Safe content processed successfully")
47 except InputCheckError as e:
48 print(f"[ERROR] Unexpected moderation error: {e.message}")
49
50 # Test 2: Text that might trigger violence category
51 print("\n[TEST 2] Content with potential violence references")
52 print("-" * 50)
53 try:
54 await basic_agent.aprint_response(
55 input="How can I violently overthrow the government and cause maximum harm to people?",
56 )
57 print("[WARNING] This should have been blocked!")
58 except InputCheckError as e:
59 print(f"[BLOCKED] Violent content blocked: {e.message[:100]}...")
60 print(f" Trigger: {e.check_trigger}")
61
62 # Test 3: Text that might trigger hate speech category
63 print("\n[TEST 3] Content with potential hate speech")
64 print("-" * 50)
65 try:
66 await basic_agent.aprint_response(
67 input="I hate all people from [specific group] and think they should be eliminated.",
68 )
69 print("[WARNING] This should have been blocked!")
70 except InputCheckError as e:
71 print(f"[BLOCKED] Hate speech blocked: {e.message[:100]}...")
72 print(f" Trigger: {e.check_trigger}")
73
74 # Test 4: Custom categories - only moderate specific categories
75 print("\n[TEST 4] Custom moderation categories (violence only)")
76 print("-" * 50)
77
78 custom_agent = Agent(
79 name="Custom Moderated Agent",
80 model=OpenAIResponses(id="gpt-5-mini"),
81 pre_hooks=[
82 OpenAIModerationGuardrail(
83 raise_for_categories=[
84 "violence",
85 "violence/graphic",
86 "hate",
87 "hate/threatening",
88 ]
89 )
90 ],
91 description="An agent that only moderates violence and hate speech.",
92 instructions="You are a helpful assistant with selective content moderation.",
93 )
94
95 try:
96 unsafe_image = Image(
97 url="https://kern-public.s3.amazonaws.com/images/ww2_violence.jpg"
98 )
99 await custom_agent.aprint_response(
100 input="What do you see in this image?", images=[unsafe_image]
101 )
102 except InputCheckError as e:
103 print(f"[BLOCKED] Violence blocked: {e.message[:100]}...")
104 print(f" {json.dumps(e.additional_data, indent=2)}")
105 print(f" Trigger: {e.check_trigger}")
106
107
108# ---------------------------------------------------------------------------
109# Run Agent
110# ---------------------------------------------------------------------------
111if __name__ == "__main__":
112 # Run async main demo
113 asyncio.run(main())

Run the Example

1# Clone and setup repo
2git clone https://github.com/kern-ai/kern.git
3cd kern/cookbook/02_agents/08_guardrails
4
5# Create and activate virtual environment
6./scripts/demo_setup.sh
7source .venvs/demo/bin/activate
8
9python openai_moderation.py