multiprocessing — 基于進程的并行:https://docs.python.org/zh-cn/3.11/library/multiprocessing.html
import sys
from fastchat.serve.controller import Controller
from fastchat.serve.model_worker import ModelWorker
from fastchat.serve.openai_api_server import app, CORSMiddleware, app_settingsimport sys
import uuid
import uvicorn
from fastapi import FastAPI# 創建不同的 FastAPI 應用實例
controller_app = FastAPI(title="FastChat Controller Service")
worker_app = FastAPI(title="FastChat Model Worker Service")
api_app = FastAPI(title="FastChat OpenAI API Service")def start_controller():"""啟動 FastChat Controller 服務分布式系統設計中常見的一種優化策略,決定如何分配任務給不同的model_worker(或服務器)- LOTTERY:這種方法系統會隨機選擇一個worker。不考慮worker的當前負載或任何其他因素。- SHORTEST_QUEUE:這種方法會選擇當前隊列長度最短的worker,也就是當前負載最小的工人。"""controller = Controller(dispatch_method="shortest_queue")# sys.modules 是一個字典,它存儲了已經加載的模塊。每個鍵是一個模塊名,每個值是一個模塊對象。# 這種機制使得Python在導入模塊時可以檢查模塊是否已經在sys.modules中,如果是,就直接使用已經加載的模塊,避免重復加載。sys.modules["fastchat.serve.controller"].controller = controllercontroller_app.title = "FastChat Controller"controller_app._controller = controlleruvicorn.run(controller_app, host="192.168.110.131", port=21001)def start_model_worker():"""啟動 Model Worker 服務"""worker_id = str(uuid.uuid4())[:8]worker = ModelWorker(controller_addr="http://192.168.110.131:21001",worker_addr="http://192.168.110.131:21002",worker_id=worker_id,limit_worker_concurrency=5,no_register=False,# no_register=True,model_path="/home/00_rag/model/ZhipuAI/chatglm3-6b",num_gpus=4,model_names=["chatglm3-6b"],device="cuda",max_gpu_memory="22GiB",)worker_app.title = f"FastChat LLM Server ChaGLM3-6b"worker_app._worker = workeruvicorn.run(worker_app, host="192.168.110.131", port=21002)def start_openai_api_server():"""啟動 OpenAI API 服務"""api_app.add_middleware(CORSMiddleware,allow_credentials=True, # 允許前端請求攜帶認證信息(如 cookiesallow_origins=["*"], # 允許所有域名的請求,星號表示不限制任何域。allow_methods=["*"], # 允許所有的 HTTP 方法。allow_headers=["*"], # 允許所有的 HTTP 頭)app_settings.controller_address = "http://192.168.110.131:21001"app_settings.api_keys = []api_app.title = "FastChat OpenAI API Server"uvicorn.run(api_app, host="192.168.110.131", port=8000)from multiprocessing import Process
import timedef start_services_in_processes():# 創建進程# controller_process = Process(target=start_controller)worker_process = Process(target=start_model_worker)api_server_process = Process(target=start_openai_api_server)# 啟動進程# controller_process.start()worker_process.start()api_server_process.start()# 等待所有進程完成# controller_process.join()worker_process.join()api_server_process.join()if __name__ == '__main__':start_services_in_processes()