為data目錄下的txt文檔用阿里百煉的文本嵌入模型創建一個本地知識庫
import os
from llama_index.core import ,Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
from llama_index.embeddings.dashscope import DashScopeEmbedding, DashScopeTextEmbeddingModelsSettings.llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX, api_key=os.getenv("DASHSCOPE_API_KEY"))
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2)
documents = SimpleDirectoryReader("./data", required_exts=[".txt"]).load_data()
index = VectorStoreIndex.from_documents(documents, transformations=[SentenceSplitter(chunk_size=256)])
index.storage_context.persist(persist_dir='./doc_embeddings')
檢索本地知識庫,同時發給大模型
import osfrom llama_index.core import Settings, load_index_from_storage, StorageContext
from llama_index.llms.dashscope import DashScope, DashScopeGenerationModels
from llama_index.embeddings.dashscope import DashScopeEmbedding, DashScopeTextEmbeddingModels
Settings.llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX,api_key=os.getenv("DASHSCOPE_API_KEY"))
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2)
storage_context = StorageContext.from_defaults(persist_dir="./doc_embeddings")
index = load_index_from_storage(storage_context=storage_context)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=5)
response = query_engine.query("你的問題?")
print(response)