一、環境信息
1.1、硬件信息
Atlas 800I A2
1.2、環境信息
注意:這里驅動固件最好用商業版,我這里用的社區版有點小問題
操作系統:openEuler 22.03 LTS
NPU驅動:Ascend-hdk-910b-npu-driver_24.1.rc3_linux-aarch64.run
NPU固件:Ascend-hdk-910b-npu-firmware_7.5.0.1.129.run
MindIE鏡像:2.0.T3-800I-A2-py311-openeuler24.03-lts
二、安裝驅動固件
2.1、創建運行用戶和所屬組
groupadd HwHiAiUser
useradd -g HwHiAiUser -d /home/HwHiAiUser -m HwHiAiUser -s /bin/bash
2.2、安裝驅動(根據提示選擇是否重啟)
./Ascend-hdk-910b-npu-driver_24.1.rc3_linux-aarch64.run --full --install-for-all
2.3、安裝固件(根據提示選擇是否重啟)
./Ascend-hdk-910b-npu-firmware_7.5.0.1.129.run --full
三、創建容器
docker run -itd --privileged --name=deepseek-70b --net=host \--shm-size 500g \--device=/dev/davinci0 \--device=/dev/davinci1 \--device=/dev/davinci2 \--device=/dev/davinci3 \--device=/dev/davinci4 \--device=/dev/davinci5 \--device=/dev/davinci6 \--device=/dev/davinci7 \--device=/dev/davinci_manager \--device=/dev/hisi_hdc \--device=/dev/devmm_svm \-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \-v /usr/local/sbin:/usr/local/sbin \-v /etc/hccn.conf:/etc/hccn.conf \-v /data:/data \swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.0.T3-800I-A2-py311-openeuler24.03-lts \bash
四、配置、啟動MindIE服務
4.1、配置環境變量
vim ~/.bashrcsource /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh# 日志打印(默認Error級別)
export MINDIE_LOG_TO_STDOUT=1# 虛擬內存
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True# ATB WorkSpace 使能
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1 # 單線程加速權重加載
export OMP_NUM_THREADS=1# 設置精度飽和模式,防止fp16引起的上下溢出
export INF_NAN_MODE_ENABLE=0# 關閉確定性計算
export HCCL_DETERMINISTIC=false# 使能AIV,暫時還有問題性問題,不建議線上業務使用,性能復現必須開啟
export HCCL_OP_EXPANSION_MODE="AIV"# 使能內存復用
export ATB_LAYER_INTERNAL_TENSOR_REUSE=1
export ATB_OPERATION_EXECUTE_ASYNC=1
export ATB_CONVERT_NCHW_TO_ND=1
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_CONTEXT_WORKSPACE_SIZE=0
export ATB_LAUNCH_KERNEL_WITH_TILING=1
export ATB_LLM_ENABLE_AUTO_TRANSPOSE=0# CPU親和性調度
export CPU_AFFINITY_CONF=1
export TASK_QUEUE_ENABLE=1
unset ASCEND_LAUNCH_BLOCKING
4.2、配置mindie配置文件
# 修改MindIE配置文件
cd /usr/local/Ascend/mindie/latest/mindie-service/
vim conf/config.json
?修改點標紅項:
{
? ? "Version" : "1.0.0",
? ? "LogConfig" :
? ? {
? ? ? ? "logLevel" : "Info",
? ? ? ? "logFileSize" : 20,
? ? ? ? "logFileNum" : 20,
? ? ? ? "logPath" : "logs/mindie-server.log"
? ? },? ? "ServerConfig" :
? ? {
? ? ? ? "ipAddress" : "0.0.0.0",
? ? ? ? "managementIpAddress" : "127.0.0.2",
? ? ? ? "port" : 1025,
? ? ? ? "managementPort" : 1026,
? ? ? ? "metricsPort" : 1027,
? ? ? ? "allowAllZeroIpListening" : true,
? ? ? ? "maxLinkNum" : 1000,
? ? ? ? "httpsEnabled" : false,
? ? ? ? "fullTextEnabled" : false,
? ? ? ? "tlsCaPath" : "security/ca/",
? ? ? ? "tlsCaFile" : ["ca.pem"],
? ? ? ? "tlsCert" : "security/certs/server.pem",
? ? ? ? "tlsPk" : "security/keys/server.key.pem",
? ? ? ? "tlsPkPwd" : "security/pass/key_pwd.txt",
? ? ? ? "tlsCrlPath" : "security/certs/",
? ? ? ? "tlsCrlFiles" : ["server_crl.pem"],
? ? ? ? "managementTlsCaFile" : ["management_ca.pem"],
? ? ? ? "managementTlsCert" : "security/certs/management/server.pem",
? ? ? ? "managementTlsPk" : "security/keys/management/server.key.pem",
? ? ? ? "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
? ? ? ? "managementTlsCrlPath" : "security/management/certs/",
? ? ? ? "managementTlsCrlFiles" : ["server_crl.pem"],
? ? ? ? "kmcKsfMaster" : "tools/pmt/master/ksfa",
? ? ? ? "kmcKsfStandby" : "tools/pmt/standby/ksfb",
? ? ? ? "inferMode" : "standard",
? ? ? ? "interCommTLSEnabled" : false,
? ? ? ? "interCommPort" : 1121,
? ? ? ? "interCommTlsCaPath" : "security/grpc/ca/",
? ? ? ? "interCommTlsCaFiles" : ["ca.pem"],
? ? ? ? "interCommTlsCert" : "security/grpc/certs/server.pem",
? ? ? ? "interCommPk" : "security/grpc/keys/server.key.pem",
? ? ? ? "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
? ? ? ? "interCommTlsCrlPath" : "security/grpc/certs/",
? ? ? ? "interCommTlsCrlFiles" : ["server_crl.pem"],
? ? ? ? "openAiSupport" : "vllm"
? ? },? ? "BackendConfig" : {
? ? ? ? "backendName" : "mindieservice_llm_engine",
? ? ? ? "modelInstanceNumber" : 1,
? ? ? ? "npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
? ? ? ? "tokenizerProcessNumber" : 8,
? ? ? ? "multiNodesInferEnabled" : false,
? ? ? ? "multiNodesInferPort" : 1120,
? ? ? ? "interNodeTLSEnabled" : false,
? ? ? ? "interNodeTlsCaPath" : "security/grpc/ca/",
? ? ? ? "interNodeTlsCaFiles" : ["ca.pem"],
? ? ? ? "interNodeTlsCert" : "security/grpc/certs/server.pem",
? ? ? ? "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
? ? ? ? "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
? ? ? ? "interNodeTlsCrlPath" : "security/grpc/certs/",
? ? ? ? "interNodeTlsCrlFiles" : ["server_crl.pem"],
? ? ? ? "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
? ? ? ? "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
? ? ? ? "ModelDeployConfig" :
? ? ? ? {
? ? ? ? ? ? "maxSeqLen" : 2560,
? ? ? ? ? ? "maxInputTokenLen" : 2048,
? ? ? ? ? ? "truncation" : false,
? ? ? ? ? ? "ModelConfig" : [
? ? ? ? ? ? ? ? {
? ? ? ? ? ? ? ? ? ? "modelInstanceType" : "Standard",
? ? ? ? ? ? ? ? ? ? "modelName" : "deepseek-70b",
? ? ? ? ? ? ? ? ? ? "modelWeightPath" : "/data/70b",
? ? ? ? ? ? ? ? ? ? "worldSize" : 8,
? ? ? ? ? ? ? ? ? ? "cpuMemSize" : 5,
? ? ? ? ? ? ? ? ? ? "npuMemSize" : -1,
? ? ? ? ? ? ? ? ? ? "backendType" : "atb",
? ? ? ? ? ? ? ? ? ? "trustRemoteCode" : false
? ? ? ? ? ? ? ? }
? ? ? ? ? ? ]
? ? ? ? },? ? ? ? "ScheduleConfig" :
? ? ? ? {
? ? ? ? ? ? "templateType" : "Standard",
? ? ? ? ? ? "templateName" : "Standard_LLM",
? ? ? ? ? ? "cacheBlockSize" : 128,? ? ? ? ? ? "maxPrefillBatchSize" : 50,
? ? ? ? ? ? "maxPrefillTokens" : 8192,
? ? ? ? ? ? "prefillTimeMsPerReq" : 150,
? ? ? ? ? ? "prefillPolicyType" : 0,? ? ? ? ? ? "decodeTimeMsPerReq" : 50,
? ? ? ? ? ? "decodePolicyType" : 0,? ? ? ? ? ? "maxBatchSize" : 200,
? ? ? ? ? ? "maxIterTimes" : 512,
? ? ? ? ? ? "maxPreemptCount" : 0,
? ? ? ? ? ? "supportSelectBatch" : false,
? ? ? ? ? ? "maxQueueDelayMicroseconds" : 5000
? ? ? ? }
? ? }
}
# 設置權重的配置文件的權限
chmod 750 {/path-to-weights/config.json}# 拉起服務
nohup ./bin/mindieservice_daemon > output.log 2>&1 &# 停止服務
ps -ef | grep mindieservice
pkill -9 mindieservice
五、測試
curl http://IP:1025/v1/chat/completions -d '{"model": "改為mindie配置文件中設置的名稱","messages": [{"role": "user", "content": "請告訴我關于人工智能的一些信息。"}],"stream": true}'
六、部署open-webui
# 修改點:
# OPENAI_API_BASE_URLS:改為自己的推理服務接口IP
# /data/open-webui:open-webui的數據存儲目錄docker run -itd --name open-webui \
--net host \
-e PORT=3006 \
-e OPENAI_API_BASE_URLS=http://192.168.50.1:1025/v1 \
-v /data/open-webui:/app/backend/data \
022ecf5a33b8 bash