langchain-github-bot/langchain_bot_simple.py at main · petehunt/langchain-github-bot

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

# This is the first simple example from the blog post that processes data

# from Wikipedia and does not use orchestration

from langchain.llms import OpenAI

from langchain.chains.qa_with_sources import load_qa_with_sources_chain

from langchain.docstore.document import Document

import requests

from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.vectorstores.faiss import FAISS

from langchain.text_splitter import CharacterTextSplitter

def get_wiki_data(title, first_paragraph_only):

url = f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&explaintext=1&titles={title}"

if first_paragraph_only:

url += "&exintro=1"

data = requests.get(url).json()

return Document(

page_content=list(data["query"]["pages"].values())[0]["extract"],

metadata={"source": f"https://en.wikipedia.org/wiki/{title}"},

)

sources = [

get_wiki_data("Unix", False),

get_wiki_data("Microsoft_Windows", False),

get_wiki_data("Linux", False),

get_wiki_data("Seinfeld", False),

get_wiki_data("Matchbox_Twenty", False),

get_wiki_data("Roman_Empire", False),

get_wiki_data("London", False),

get_wiki_data("Python_(programming_language)", False),

get_wiki_data("Monty_Python", False),

]

source_chunks = []

splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)

for source in sources:

for chunk in splitter.split_text(source.page_content):

source_chunks.append(Document(page_content=chunk, metadata=source.metadata))

search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings())

chain = load_qa_with_sources_chain(OpenAI(temperature=0))

def print_answer(question):

print(

chain(

{

"input_documents": search_index.similarity_search(question, k=4),

"question": question,

},

return_only_outputs=True,

)["output_text"]

)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

langchain_bot_simple.py

langchain_bot_simple.py

Files

langchain_bot_simple.py

Latest commit

History

langchain_bot_simple.py

File metadata and controls