1import asyncio
2import pathlib
3
4from kern.agent import Agent
5from kern.knowledge.knowledge import Knowledge
6from kern.vectordb.langchaindb import LangChainVectorDb
7from langchain.text_splitter import CharacterTextSplitter
8from langchain_chroma import Chroma
9from langchain_community.document_loaders import TextLoader
10from langchain_openai import OpenAIEmbeddings
11
12# Define the directory where the Chroma database is located
13chroma_db_dir = pathlib.Path("./chroma_db")
14
15# Define the path to the document to be loaded into the knowledge base
16state_of_the_union = pathlib.Path(
17 "cookbook/08_knowledge/testing_resources/state_of_the_union.txt"
18)
19
20# Load the document
21raw_documents = TextLoader(str(state_of_the_union), encoding="utf-8").load()
22
23# Split the document into chunks
24text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
25documents = text_splitter.split_documents(raw_documents)
26
27# Embed each chunk and load it into the vector store
28Chroma.from_documents(
29 documents, OpenAIEmbeddings(), persist_directory=str(chroma_db_dir)
30)
31
32# Get the vector database
33db = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory=str(chroma_db_dir))
34
35# Create a knowledge retriever from the vector store
36knowledge_retriever = db.as_retriever()
37
38knowledge = Knowledge(
39 vector_db=LangChainVectorDb(knowledge_retriever=knowledge_retriever)
40)
41
42agent = Agent(knowledge=knowledge)
43
44if __name__ == "__main__":
45 asyncio.run(
46 agent.aprint_response("What did the president say?", markdown=True)
47 )