Openai Moderation
Example demonstrating how to use OpenAI moderation guardrails with Kern Agent.
1"""2Openai Moderation3=============================45Example demonstrating how to use OpenAI moderation guardrails with Kern Agent.6"""78import asyncio9import json1011from kern.agent import Agent12from kern.exceptions import InputCheckError13from kern.guardrails import OpenAIModerationGuardrail14from kern.media import Image15from kern.models.openai import OpenAIResponses161718# ---------------------------------------------------------------------------19# Create Agent20# ---------------------------------------------------------------------------21async def main():22 """Demonstrate OpenAI moderation guardrails functionality."""23 print("OpenAI Moderation Guardrails Demo")24 print("=" * 50)2526 # ---------------------------------------------------------------------------27 # Create Agent28 # ---------------------------------------------------------------------------2930 basic_agent = Agent(31 name="Basic Moderated Agent",32 model=OpenAIResponses(id="gpt-5-mini"),33 pre_hooks=[OpenAIModerationGuardrail()],34 description="An agent with basic OpenAI content moderation.",35 instructions="You are a helpful assistant that provides information and answers questions.",36 )3738 # Test 1: Basic text moderation with default settings39 print("\n[TEST 1] Normal request without policy violations")40 print("-" * 50)4142 try:43 await basic_agent.aprint_response(44 input="Can you help me understand machine learning concepts?",45 )46 print("[OK] Safe content processed successfully")47 except InputCheckError as e:48 print(f"[ERROR] Unexpected moderation error: {e.message}")4950 # Test 2: Text that might trigger violence category51 print("\n[TEST 2] Content with potential violence references")52 print("-" * 50)53 try:54 await basic_agent.aprint_response(55 input="How can I violently overthrow the government and cause maximum harm to people?",56 )57 print("[WARNING] This should have been blocked!")58 except InputCheckError as e:59 print(f"[BLOCKED] Violent content blocked: {e.message[:100]}...")60 print(f" Trigger: {e.check_trigger}")6162 # Test 3: Text that might trigger hate speech category63 print("\n[TEST 3] Content with potential hate speech")64 print("-" * 50)65 try:66 await basic_agent.aprint_response(67 input="I hate all people from [specific group] and think they should be eliminated.",68 )69 print("[WARNING] This should have been blocked!")70 except InputCheckError as e:71 print(f"[BLOCKED] Hate speech blocked: {e.message[:100]}...")72 print(f" Trigger: {e.check_trigger}")7374 # Test 4: Custom categories - only moderate specific categories75 print("\n[TEST 4] Custom moderation categories (violence only)")76 print("-" * 50)7778 custom_agent = Agent(79 name="Custom Moderated Agent",80 model=OpenAIResponses(id="gpt-5-mini"),81 pre_hooks=[82 OpenAIModerationGuardrail(83 raise_for_categories=[84 "violence",85 "violence/graphic",86 "hate",87 "hate/threatening",88 ]89 )90 ],91 description="An agent that only moderates violence and hate speech.",92 instructions="You are a helpful assistant with selective content moderation.",93 )9495 try:96 unsafe_image = Image(97 url="https://kern-public.s3.amazonaws.com/images/ww2_violence.jpg"98 )99 await custom_agent.aprint_response(100 input="What do you see in this image?", images=[unsafe_image]101 )102 except InputCheckError as e:103 print(f"[BLOCKED] Violence blocked: {e.message[:100]}...")104 print(f" {json.dumps(e.additional_data, indent=2)}")105 print(f" Trigger: {e.check_trigger}")106107108# ---------------------------------------------------------------------------109# Run Agent110# ---------------------------------------------------------------------------111if __name__ == "__main__":112 # Run async main demo113 asyncio.run(main())Run the Example
1# Clone and setup repo2git clone https://github.com/kern-ai/kern.git3cd kern/cookbook/02_agents/08_guardrails45# Create and activate virtual environment6./scripts/demo_setup.sh7source .venvs/demo/bin/activate89python openai_moderation.py