1import pathlib
2from kern.agent import Agent
3from kern.knowledge.knowledge import Knowledge
4from kern.vectordb.langchaindb import LangChainVectorDb
5from langchain.text_splitter import CharacterTextSplitter
6from langchain_chroma import Chroma
7from langchain_community.document_loaders import TextLoader
8from langchain_openai import OpenAIEmbeddings
9
10# Define the directory where the Chroma database is located
11chroma_db_dir = pathlib.Path("./chroma_db")
12
13# Define the path to the document to be loaded into the knowledge base
14state_of_the_union = pathlib.Path(
15 "cookbook/08_knowledge/testing_resources/state_of_the_union.txt"
16)
17
18# Load the document
19raw_documents = TextLoader(str(state_of_the_union), encoding="utf-8").load()
20
21# Split the document into chunks
22text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
23documents = text_splitter.split_documents(raw_documents)
24
25# Embed each chunk and load it into the vector store
26Chroma.from_documents(
27 documents, OpenAIEmbeddings(), persist_directory=str(chroma_db_dir)
28)
29
30# Get the vector database
31db = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory=str(chroma_db_dir))
32
33# Create a knowledge retriever from the vector store
34knowledge_retriever = db.as_retriever()
35
36knowledge = Knowledge(
37 vector_db=LangChainVectorDb(knowledge_retriever=knowledge_retriever)
38)
39
40agent = Agent(knowledge=knowledge)
41
42agent.print_response("What did the president say?", markdown=True)