隨著大模型技術的快速發展,市場上出現了越來越多的LLM服務提供商,包括OpenAI、Anthropic、Google、百度、阿里云等。作為開發者,我們經常需要在不同的模型之間切換,或者同時使用多個模型來滿足不同的業務需求。本文將詳細介紹如何在Python中統一調用多家大模型API,并提供完整的代碼示例。
為什么需要統一API調用?
在實際開發中,我們可能會遇到以下場景:
- 模型對比測試:需要在不同模型間進行效果對比
- 成本優化:根據任務復雜度選擇不同價格的模型
- 可用性保障:當主要模型服務不可用時,快速切換到備用模型
- 特定能力需求:不同模型在某些任務上表現各異
方案一:使用LiteLLM - 輕量級統一接口
LiteLLM是專門為統一API調用設計的輕量級庫,支持100+不同的LLM提供商,提供OpenAI格式的統一接口。
安裝和基礎使用
pip install litellm
基礎調用示例
from litellm import completion
import os# 設置各廠家的API密鑰
os.environ["OPENAI_API_KEY"] = "your_openai_key"
os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_key"
os.environ["GOOGLE_API_KEY"] = "your_google_key"def test_multiple_models():messages = [{"role": "user", "content": "請用中文介紹一下機器學習"}]# 調用OpenAI GPT-4response1 = completion(model="gpt-4", messages=messages,temperature=0.7)print("GPT-4 回復:", response1.choices[0].message.content)# 調用Anthropic Clauderesponse2 = completion(model="claude-3-sonnet-20240229", messages=messages,temperature=0.7)print("Claude 回復:", response2.choices[0].message.content)# 調用Google Geminiresponse3 = completion(model="gemini-pro", messages=messages,temperature=0.7)print("Gemini 回復:", response3.choices[0].message.content)if __name__ == "__main__":test_multiple_models()
高級功能:流式響應和錯誤處理
from litellm import completion
import litellmdef stream_chat_with_fallback(messages, models=["gpt-4", "claude-3-sonnet-20240229", "gemini-pro"]):"""帶故障切換的流式聊天"""for model in models:try:print(f"嘗試使用模型: {model}")response = completion(model=model,messages=messages,stream=True,temperature=0.7)print(f"使用 {model} 的回復:")for chunk in response:if chunk.choices[0].delta.content:print(chunk.choices[0].delta.content, end="")print("\n")return # 成功后退出except Exception as e:print(f"模型 {model} 調用失敗: {e}")continueprint("所有模型都調用失敗")# 使用示例
messages = [{"role": "user", "content": "請詳細解釋什么是Transformer架構"}]
stream_chat_with_fallback(messages)
方案二:使用LangChain - 全功能框架
LangChain不僅提供API調用,還是一個完整的LLM應用開發框架。
安裝和配置
pip install langchain langchain-openai langchain-anthropic langchain-google-genai
多模型對比示例
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import asyncioclass MultiModelComparison:def __init__(self):self.models = {"gpt-4": ChatOpenAI(model="gpt-4", temperature=0.7),"claude-3-sonnet": ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0.7),"gemini-pro": ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)}async def compare_models(self, question):"""異步對比多個模型的回復"""tasks = []for model_name, model in self.models.items():task = self.get_model_response(model_name, model, question)tasks.append(task)results = await asyncio.gather(*tasks, return_exceptions=True)return resultsasync def get_model_response(self, model_name, model, question):"""獲取單個模型的回復"""try:message = HumanMessage(content=question)response = await model.ainvoke([message])return {"model": model_name,"response": response.content,"success": True}except Exception as e:return {"model": model_name,"error": str(e),"success": False}# 使用示例
async def main():comparator = MultiModelComparison()question = "請解釋深度學習中的反向傳播算法"results = await comparator.compare_models(question)for result in results:print(f"\n{'='*50}")print(f"模型: {result['model']}")if result['success']:print(f"回復: {result['response'][:200]}...")else:print(f"錯誤: {result['error']}")# 運行異步函數
if __name__ == "__main__":asyncio.run(main())
LangChain鏈式調用示例
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParserdef create_translation_chain():"""創建翻譯鏈:GPT-4翻譯,Claude校對"""# GPT-4負責翻譯translator = ChatOpenAI(model="gpt-4", temperature=0.3)translation_prompt = ChatPromptTemplate.from_template("請將以下中文準確翻譯為英文:\n{text}")# Claude負責校對proofreader = ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0.3)proofread_prompt = ChatPromptTemplate.from_template("請校對以下英文翻譯,如有問題請修正:\n{translation}")# 構建鏈translation_chain = (translation_prompt | translator | StrOutputParser())proofread_chain = (proofread_prompt | proofreader | StrOutputParser())return translation_chain, proofread_chaindef translate_and_proofread(text):"""翻譯并校對"""translation_chain, proofread_chain = create_translation_chain()# 第一步:翻譯translation = translation_chain.invoke({"text": text})print(f"GPT-4翻譯結果: {translation}")# 第二步:校對final_result = proofread_chain.invoke({"translation": translation})print(f"Claude校對結果: {final_result}")return final_result# 使用示例
chinese_text = "人工智能正在深刻改變我們的生活方式和工作方式"
result = translate_and_proofread(chinese_text)
方案三:自定義SDK封裝
如果你需要更靈活的控制,可以創建自己的統一封裝類。
基礎封裝架構
from abc import ABC, abstractmethod
import openai
import anthropic
from google.generativeai import GenerativeModel
import google.generativeai as genai
import json
from typing import List, Dict, Any, Optional
import timeclass LLMProvider(ABC):"""LLM提供商的抽象基類"""@abstractmethoddef chat(self, messages: List[Dict], **kwargs) -> str:pass@abstractmethoddef stream_chat(self, messages: List[Dict], **kwargs):passclass OpenAIProvider(LLMProvider):def __init__(self, api_key: str, model: str = "gpt-4"):self.client = openai.OpenAI(api_key=api_key)self.model = modeldef chat(self, messages: List[Dict], **kwargs) -> str:response = self.client.chat.completions.create(model=self.model,messages=messages,temperature=kwargs.get('temperature', 0.7),max_tokens=kwargs.get('max_tokens', 1000))return response.choices[0].message.contentdef stream_chat(self, messages: List[Dict], **kwargs):response = self.client.chat.completions.create(model=self.model,messages=messages,stream=True,temperature=kwargs.get('temperature', 0.7),max_tokens=kwargs.get('max_tokens', 1000))for chunk in response:if chunk.choices[0].delta.content:yield chunk.choices[0].delta.contentclass AnthropicProvider(LLMProvider):def __init__(self, api_key: str, model: str = "claude-3-sonnet-20240229"):self.client = anthropic.Anthropic(api_key=api_key)self.model = modeldef chat(self, messages: List[Dict], **kwargs) -> str:response = self.client.messages.create(model=self.model,messages=messages,temperature=kwargs.get('temperature', 0.7),max_tokens=kwargs.get('max_tokens', 1000))return response.content[0].textdef stream_chat(self, messages: List[Dict], **kwargs):with self.client.messages.stream(model=self.model,messages=messages,temperature=kwargs.get('temperature', 0.7),max_tokens=kwargs.get('max_tokens', 1000)) as stream:for text in stream.text_stream:yield textclass GoogleProvider(LLMProvider):def __init__(self, api_key: str, model: str = "gemini-pro"):genai.configure(api_key=api_key)self.model = GenerativeModel(model)def chat(self, messages: List[Dict], **kwargs) -> str:# 轉換消息格式prompt = self._convert_messages(messages)response = self.model.generate_content(prompt)return response.textdef stream_chat(self, messages: List[Dict], **kwargs):prompt = self._convert_messages(messages)response = self.model.generate_content(prompt, stream=True)for chunk in response:if chunk.text:yield chunk.textdef _convert_messages(self, messages: List[Dict]) -> str:"""將OpenAI格式的消息轉換為Google格式"""prompt = ""for msg in messages:if msg["role"] == "user":prompt += f"User: {msg['content']}\n"elif msg["role"] == "assistant":prompt += f"Assistant: {msg['content']}\n"return promptclass UnifiedLLMClient:"""統一的LLM客戶端"""def __init__(self):self.providers: Dict[str, LLMProvider] = {}self.default_provider = Noneself.retry_count = 3self.retry_delay = 1def add_provider(self, name: str, provider: LLMProvider, is_default: bool = False):"""添加LLM提供商"""self.providers[name] = providerif is_default or self.default_provider is None:self.default_provider = namedef chat(self, messages: List[Dict], provider: Optional[str] = None, **kwargs) -> str:"""統一的聊天接口"""provider_name = provider or self.default_providerif provider_name not in self.providers:raise ValueError(f"Provider {provider_name} not found")return self._execute_with_retry(self.providers[provider_name].chat,messages,**kwargs)def stream_chat(self, messages: List[Dict], provider: Optional[str] = None, **kwargs):"""統一的流式聊天接口"""provider_name = provider or self.default_providerif provider_name not in self.providers:raise ValueError(f"Provider {provider_name} not found")return self.providers[provider_name].stream_chat(messages, **kwargs)def chat_with_fallback(self, messages: List[Dict], providers: List[str] = None, **kwargs) -> Dict[str, Any]:"""帶故障切換的聊天"""providers = providers or list(self.providers.keys())for provider_name in providers:try:start_time = time.time()response = self.chat(messages, provider=provider_name, **kwargs)end_time = time.time()return {"provider": provider_name,"response": response,"success": True,"response_time": end_time - start_time}except Exception as e:print(f"Provider {provider_name} failed: {e}")continueraise Exception("All providers failed")def _execute_with_retry(self, func, *args, **kwargs):"""帶重試的執行函數"""for attempt in range(self.retry_count):try:return func(*args, **kwargs)except Exception as e:if attempt == self.retry_count - 1:raise etime.sleep(self.retry_delay * (2 ** attempt)) # 指數退避
使用自定義封裝的完整示例
import os
from typing import List, Dictdef setup_unified_client():"""設置統一客戶端"""client = UnifiedLLMClient()# 添加OpenAI提供商if os.getenv("OPENAI_API_KEY"):openai_provider = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"),model="gpt-4")client.add_provider("openai", openai_provider, is_default=True)# 添加Anthropic提供商if os.getenv("ANTHROPIC_API_KEY"):anthropic_provider = AnthropicProvider(api_key=os.getenv("ANTHROPIC_API_KEY"),model="claude-3-sonnet-20240229")client.add_provider("anthropic", anthropic_provider)# 添加Google提供商if os.getenv("GOOGLE_API_KEY"):google_provider = GoogleProvider(api_key=os.getenv("GOOGLE_API_KEY"),model="gemini-pro")client.add_provider("google", google_provider)return clientdef demo_unified_client():"""演示統一客戶端的使用"""client = setup_unified_client()messages = [{"role": "user", "content": "請解釋什么是大語言模型,并給出一個實際應用案例"}]# 1. 使用默認提供商print("=== 使用默認提供商 ===")response = client.chat(messages)print(f"回復: {response}\n")# 2. 指定提供商print("=== 指定使用Anthropic ===")try:response = client.chat(messages, provider="anthropic")print(f"回復: {response}\n")except Exception as e:print(f"調用失敗: {e}\n")# 3. 帶故障切換的調用print("=== 帶故障切換的調用 ===")try:result = client.chat_with_fallback(messages, providers=["anthropic", "openai", "google"])print(f"使用的提供商: {result['provider']}")print(f"響應時間: {result['response_time']:.2f}秒")print(f"回復: {result['response'][:100]}...\n")except Exception as e:print(f"所有提供商都失敗: {e}\n")# 4. 流式調用print("=== 流式調用 ===")try:stream = client.stream_chat(messages, provider="openai")for chunk in stream:print(chunk, end="", flush=True)print("\n")except Exception as e:print(f"流式調用失敗: {e}\n")if __name__ == "__main__":demo_unified_client()
方案四:使用開源網關解決方案
對于企業級應用,推薦使用開源的API網關解決方案,如One-API。
One-API部署和使用
# 使用Docker部署One-API
docker run -d \--name one-api \-p 3000:3000 \-e SQL_DSN="root:password@tcp(localhost:3306)/oneapi" \-e SESSION_SECRET="your-secret-key" \-e INITIAL_ROOT_TOKEN="your-initial-token" \justsong/one-api:latest
通過One-API調用示例
import requests
import jsonclass OneAPIClient:def __init__(self, base_url: str, api_key: str):self.base_url = base_url.rstrip('/')self.api_key = api_keyself.headers = {'Authorization': f'Bearer {api_key}','Content-Type': 'application/json'}def chat(self, messages: List[Dict], model: str = "gpt-3.5-turbo", **kwargs):"""通過One-API統一接口調用"""url = f"{self.base_url}/v1/chat/completions"payload = {"model": model,"messages": messages,"temperature": kwargs.get('temperature', 0.7),"max_tokens": kwargs.get('max_tokens', 1000),"stream": kwargs.get('stream', False)}response = requests.post(url, headers=self.headers, json=payload)if response.status_code == 200:return response.json()else:raise Exception(f"API call failed: {response.status_code}, {response.text}")def list_models(self):"""獲取可用模型列表"""url = f"{self.base_url}/v1/models"response = requests.get(url, headers=self.headers)if response.status_code == 200:return response.json()else:raise Exception(f"Failed to get models: {response.status_code}")# 使用示例
def demo_one_api():client = OneAPIClient(base_url="http://localhost:3000",api_key="your-one-api-key")# 獲取可用模型models = client.list_models()print("可用模型:", [model['id'] for model in models['data']])# 調用不同模型messages = [{"role": "user", "content": "你好,請介紹一下你自己"}]for model in ["gpt-4", "claude-3-sonnet", "gemini-pro"]:try:response = client.chat(messages, model=model)print(f"\n{model} 回復:")print(response['choices'][0]['message']['content'])except Exception as e:print(f"{model} 調用失敗: {e}")if __name__ == "__main__":demo_one_api()
性能監控和日志記錄
在生產環境中,我們需要對多模型調用進行監控和日志記錄。
import logging
import time
from functools import wraps
from typing import Dict, Any
import jsonclass LLMMonitor:def __init__(self):self.setup_logging()self.metrics = {'total_requests': 0,'successful_requests': 0,'failed_requests': 0,'response_times': [],'provider_usage': {}}def setup_logging(self):"""設置日志記錄"""logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',handlers=[logging.FileHandler('llm_requests.log'),logging.StreamHandler()])self.logger = logging.getLogger('LLMMonitor')def log_request(self, provider: str, model: str, messages: List[Dict], **kwargs):"""記錄請求日志"""self.logger.info(f"Request - Provider: {provider}, Model: {model}, Messages: {len(messages)}")self.metrics['total_requests'] += 1self.metrics['provider_usage'][provider] = self.metrics['provider_usage'].get(provider, 0) + 1def log_response(self, provider: str, model: str, response: str, response_time: float, success: bool):"""記錄響應日志"""if success:self.metrics['successful_requests'] += 1self.metrics['response_times'].append(response_time)self.logger.info(f"Success - Provider: {provider}, Model: {model}, Time: {response_time:.2f}s")else:self.metrics['failed_requests'] += 1self.logger.error(f"Failed - Provider: {provider}, Model: {model}")def get_metrics(self) -> Dict[str, Any]:"""獲取性能指標"""avg_response_time = sum(self.metrics['response_times']) / len(self.metrics['response_times']) if self.metrics['response_times'] else 0return {'total_requests': self.metrics['total_requests'],'successful_requests': self.metrics['successful_requests'],'failed_requests': self.metrics['failed_requests'],'success_rate': self.metrics['successful_requests'] / self.metrics['total_requests'] if self.metrics['total_requests'] > 0 else 0,'average_response_time': avg_response_time,'provider_usage': self.metrics['provider_usage']}def monitor_llm_call(monitor: LLMMonitor):"""裝飾器:監控LLM調用"""def decorator(func):@wraps(func)def wrapper(*args, **kwargs):start_time = time.time()provider = kwargs.get('provider', 'unknown')model = kwargs.get('model', 'unknown')messages = args[1] if len(args) > 1 else []monitor.log_request(provider, model, messages, **kwargs)try:result = func(*args, **kwargs)end_time = time.time()response_time = end_time - start_timemonitor.log_response(provider, model, str(result), response_time, True)return resultexcept Exception as e:end_time = time.time()response_time = end_time - start_timemonitor.log_response(provider, model, str(e), response_time, False)raisereturn wrapperreturn decorator# 使用監控裝飾器的示例
monitor = LLMMonitor()@monitor_llm_call(monitor)
def monitored_chat(client: UnifiedLLMClient, messages: List[Dict], **kwargs):return client.chat(messages, **kwargs)# 使用示例
def demo_monitoring():client = setup_unified_client()messages = [{"role": "user", "content": "請解釋機器學習的基本概念"}]# 進行多次調用以生成監控數據for i in range(5):try:response = monitored_chat(client, messages, provider="openai")print(f"調用 {i+1} 成功")except Exception as e:print(f"調用 {i+1} 失敗: {e}")# 查看性能指標metrics = monitor.get_metrics()print("\n=== 性能指標 ===")print(json.dumps(metrics, indent=2, ensure_ascii=False))if __name__ == "__main__":demo_monitoring()
最佳實踐建議
1. 錯誤處理和重試機制
import random
import time
from typing import List, Callableclass RobustLLMClient:def __init__(self, client: UnifiedLLMClient):self.client = clientself.max_retries = 3self.base_delay = 1self.max_delay = 60def exponential_backoff(self, attempt: int) -> float:"""指數退避算法"""delay = self.base_delay * (2 ** attempt)jitter = random.uniform(0, 0.1) * delayreturn min(delay + jitter, self.max_delay)def robust_chat(self, messages: List[Dict], providers: List[str] = None, **kwargs) -> Dict[str, Any]:"""健壯的聊天調用"""providers = providers or list(self.client.providers.keys())for provider in providers:for attempt in range(self.max_retries):try:start_time = time.time()response = self.client.chat(messages, provider=provider, **kwargs)end_time = time.time()return {"provider": provider,"response": response,"success": True,"response_time": end_time - start_time,"attempt": attempt + 1}except Exception as e:if attempt < self.max_retries - 1:delay = self.exponential_backoff(attempt)print(f"Provider {provider} attempt {attempt + 1} failed, retrying in {delay:.2f}s: {e}")time.sleep(delay)else:print(f"Provider {provider} failed after {self.max_retries} attempts: {e}")breakraise Exception("All providers and retries exhausted")
2. 配置驅動的模型選擇
import yaml
from typing import Dict, Anyclass ConfigurableLLMClient:def __init__(self, config_path: str):with open(config_path, 'r', encoding='utf-8') as f:self.config = yaml.safe_load(f)self.client = self.setup_client()def setup_client(self) -> UnifiedLLMClient:"""根據配置設置客戶端"""client = UnifiedLLMClient()for provider_config in self.config['providers']:provider_name = provider_config['name']provider_type = provider_config['type']if provider_type == 'openai':provider = OpenAIProvider(api_key=provider_config['api_key'],model=provider_config['model'])elif provider_type == 'anthropic':provider = AnthropicProvider(api_key=provider_config['api_key'],model=provider_config['model'])elif provider_type == 'google':provider = GoogleProvider(api_key=provider_config['api_key'],model=provider_config['model'])client.add_provider(provider_name, provider, is_default=provider_config.get('is_default', False))return clientdef get_provider_for_task(self, task_type: str) -> str:"""根據任務類型選擇提供商"""task_mapping = self.config.get('task_mapping', {})return task_mapping.get(task_type, self.client.default_provider)def smart_chat(self, messages: List[Dict], task_type: str = "general", **kwargs) -> str:"""智能聊天:根據任務類型選擇最適合的模型"""provider = self.get_provider_for_task(task_type