Python使用FastAPI結合Word2vec來向量化200維的語言向量數值

準備

pip install fastapi>=0.68.0
pip install uvicorn[standard]>=0.15.0
pip install gensim>=4.0.0
pip install jieba>=0.42.1
pip install numpy>=1.21.0
pip install scikit-learn>=1.0.0

少了的就直接補充就好

代碼

from fastapi import FastAPI, HTTPException
from gensim.models import KeyedVectors
import jieba
import numpy as np
import os
import logging# 配置日志
logging.basicConfig(level=logging.INFO)app = FastAPI(title="Text Embedding API")# 路徑配置
MODEL_PATH = os.path.abspath("../light_Tencent_AILab_ChineseEmbedding.bin")# 服務啟動前檢查
@app.on_event("startup")
async def load_model():global modeltry:if not os.path.exists(MODEL_PATH):raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")model = KeyedVectors.load_word2vec_format(MODEL_PATH, binary=True)logging.info(f"? 模型加載成功 | 詞表量：{len(model.key_to_index)}")logging.info(f"? 詞向量維度：{model.vector_size}")  # 確認輸出200except Exception as e:logging.error(f"? 初始化失敗：{str(e)}")raise RuntimeError("Service initialization failed")def text_to_vector(text: str) -> np.ndarray:"""直接返回200維向量"""words = jieba.lcut(text)vectors = []for word in words:if word in model.key_to_index:vec = model[word]# 添加維度驗證assert vec.shape == (200,), f"詞向量維度異常: {vec.shape}"vectors.append(vec)if not vectors:return np.zeros(model.vector_size)avg_vector = np.mean(vectors, axis=0)assert avg_vector.shape == (200,), f"平均向量維度異常: {avg_vector.shape}"return avg_vector@app.get("/vector")
async def get_vector(sentence: str):if not model:raise HTTPException(503, "服務未就緒")if len(sentence.strip()) < 2:raise HTTPException(400, "輸入文本過短")try:vector = text_to_vector(sentence)return {"dimension": vector.size,"vector": vector.tolist()}except Exception as e:logging.error(f"處理失敗：{str(e)}")raise HTTPException(500, "內部錯誤")if __name__ == "__main__":import uvicornuvicorn.run(app, host="0.0.0.0", port=8000)

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/pingmian/71759.shtml
繁體地址，請注明出處：http://hk.pswp.cn/pingmian/71759.shtml
英文地址，請注明出處：http://en.pswp.cn/pingmian/71759.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！