一、安裝langchain
? ? ? ? 安裝依賴
python -m venv env.\env\Scripts\activatepip3 install langchainpip3 install langchain-corepip3 install langchain-openaipip3 install langchain-communitypip3 install dashscopepip3 install langchain_postgrespip3 install "psycopg[binary]"
? ? ? ?導入庫函數
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_core.messages import HumanMessage,SystemMessage
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_core.documents import Document
二、知識庫轉換向量
? ? ? 使用通義千問的向量模型將私域知識庫的數據轉化為制定維度的向量,并將向量存入向量數據庫pgvector中
COLLECTION_NAME = "t_rag"
CONNECTION = "postgresql+psycopg://postgres:12346@server200:5432/postgres_db"#實例化千問的向量模型
embedding = DashScopeEmbeddings(model="text-embedding-v3",dashscope_api_key="sk-xxx")#**創建向量數據庫
vector_store = PGVector(connection = CONNECTION,collection_name = COLLECTION_NAME,embeddings = embedding
)
? ? ? ? ?準備文本轉化向量存儲
def save_vector():print("save_vector start .....")documents = [Document(page_content="段一凡出生東川縣一個農民工家庭,畢業于江南大學,成績優異",metadata={"source":"brief"},id=1),Document(page_content="現任吉南市環保局局長,曾任市共青團副書記,此前還擔任過回龍鄉鄉長、黨委書記等職務",metadata={"source":"brief"},id=2),Document(page_content="肖素素、王雪瑩、吳曉恙、劉淼淼這四個大美女與他都有著千絲萬縷的關系",metadata={"source":"brief"},id=3),Document(page_content="肖素素某國開國將軍的曾孫女,美麗智慧,某國企總經理,與段一凡生死患難,互生情愫,但二人身份差距懸殊",metadata={"source":"brief"},id=4),Document(page_content="王雪瑩省戰略策劃室副主任王慶支之女,段一凡學妹,喜歡段一凡",metadata={"source":"brief"},id=5),Document(page_content="吳曉恙商人之家,典型富二代,喜歡段一凡",metadata={"source":"brief"},id=6),Document(page_content="劉淼淼冰冷美女,前縣委書記劉海龍之女,劉海龍整治過段一凡,劉淼淼自殺,被段一凡救過,對段一凡產生愛意",metadata={"source":"brief"},id=7),]uuids = [str(uuid4()) for _ in range(len(documents))]try:vector_store.add_documents(documents=documents, ids=uuids)print("save_vector successful.")except Exception as e:print(f"save_vector failed: {e}")
def main(): save_vector()
? ??
?
? ? ? 文本數據庫成功存入向量數據庫?
三、檢索增強
? ? ? ? 將提示詞和匹配向量一起發給大模型進行提問
def rag_vector(query):print("query_vector start .....")#results = vector_store.similarity_search(query = query,k=5,filter={"source": "brief"})#通過向量生成檢索器retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 5})prompt = hub.pull("rlm/rag-prompt")messages = prompt.invoke({"question": query,"context": retriever.invoke(query)})llm = ChatTongyi(streaming = False,model = "qwen-plus",api_key = "sk-xxxx",)ai_message = llm.invoke(messages)print("AI answer :----------",ai_message)
def main(): rag_vector("段一凡是誰?")
? ? ? ?大模型的回答基于上下文向量信息進行學習
? ? ?