1:啟動vllm的openai兼容server:
export VLLM_USE_MODELSCOPE=True
python -m vllm.entrypoints.openai.api_server --model 'qwen/Qwen-7B-Chat-Int4' --trust-remote-code -q gptq --dtype float16 --gpu-memory-utilization 0.6
2:構建向量數據庫
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import ModelScopeEmbeddings
from langchain_community.vectorstores import FAISS# 解析PDF,切成chunk片段
pdf_loader=PyPDFLoader('LLM.pdf',extract_images=True) # 使用OCR解析pdf中圖片里面的文字
chunks=pdf_loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10))# 加載embedding模型,用于將chunk向量化
embeddings=ModelScopeEmbeddings(model_id='iic/nlp_corom_sentence-embedding_chinese-base') # 將chunk插入到faiss本地向量數據庫
vector_db=FAISS.from_documents(chunks,embeddings)
vector_db.save_local('/kaggle/working/LLM.faiss')print('faiss saved!')
3:開啟對話
from langchain_community.embeddings import ModelScopeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.memory import ChatMessageHistory
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, \AIMessagePromptTemplate, MessagesPlaceholder
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
import os# 加載embedding模型,用于將query向量化
embeddings = ModelScopeEmbeddings(model_id='iic/nlp_corom_sentence-embedding_chinese-base')# 加載faiss向量庫,用于知識召回
vector_db = FAISS.load_local('/kaggle/working/LLM.faiss', embeddings, allow_dangerous_deserialization=True)
retriever = vector_db.as_retriever(search_kwargs={"k": 5})# 用vllm部署openai兼容的服務端接口,然后走ChatOpenAI客戶端調用
os.environ['VLLM_USE_MODELSCOPE'] = 'True'
chat = ChatOpenAI(model="qwen/Qwen-7B-Chat-Int4",openai_api_key="EMPTY",openai_api_base='http://localhost:8000/v1',stop=['<|im_end|>']
)# Prompt模板
system_prompt = SystemMessagePromptTemplate.from_template('You are a helpful assistant.')
user_prompt = HumanMessagePromptTemplate.from_template('''
Answer the question based only on the following context:{context}Question: {query}
''')
full_chat_prompt = ChatPromptTemplate.from_messages([system_prompt, MessagesPlaceholder(variable_name="chat_history"), user_prompt])# Chat chain
chat_chain = {"context": itemgetter("query") | retriever,"query": itemgetter("query"),"chat_history": itemgetter("chat_history"),} | full_chat_prompt | chat# 開始對話
chat_history = []
query = input('query:')
response = chat_chain.invoke({'query': query, 'chat_history': chat_history})
chat_history.extend((HumanMessage(content=query), response))
print(response.content)
chat_history = chat_history[-20:] # 最新10輪對話