旅游信息檢索
旅游信息檢索是系統中實現數據獲取和處理的關鍵環節,負責根據用戶輸入的目的地城市和出游天數,動態獲取并生成高質量的旅游數據。
模塊的工作流程分為以下幾個階段:首先,對用戶輸入的信息進行標準化處理,將城市名稱和時間信息改寫為適合搜索引擎的查詢模板(query)。隨后,系統調用 Google Search API 進行景點和美食信息的文本檢索,包括景點描述、距離、推薦美食等詳細內容;同時,為了補充圖片資源,模塊還調用 DuckDuckGo 搜索引擎,專注于獲取高質量的景點和美食圖片鏈接。
在數據檢索完成后,模塊進一步利用大語言模型(LLM)對初步搜索結果進行智能解析與重排序(rerank),從相關性和用戶需求角度優化數據質量,確保信息全面、準確、優先級清晰。經過優化后的數據會以結構化的形式存儲到數據庫中,包含每個景點和美食的名稱、詳細描述、推薦理由以及圖片 URL。
from camel.toolkits import SearchToolkit
from camel.agents import ChatAgent
from camel.models import ModelFactory
from camel.types import ModelPlatformType
from camel.loaders import Firecrawl
from typing import List, Dict, Anyfrom flask import Flask, request, jsonify
import json
import os
from dotenv import load_dotenvload_dotenv()os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["SEARCH_ENGINE_ID"] = os.getenv("SEARCH_ENGINE_ID")
os.environ["FIRECRAWL_API_KEY"] = os.getenv("FIRECRAWL_API_KEY")
os.environ["QWEN_API_KEY"] = os.getenv("QWEN_API_KEY")app = Flask(__name__)class TravelPlanner:def __init__(self, city: str, days: int):#定義地點和時間,設置默認值self.city = cityself.days = daysself.res = None # 初始化模型和智能體self.model = ModelFactory.create(model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,model_type="Qwen/Qwen2.5-72B-Instruct",url='https://api-inference.modelscope.cn/v1/',api_key=os.getenv('QWEN_API_KEY'))# 初始化各種工具#重排序模型self.reranker_agent = ChatAgent(system_message="你是一搜索質量打分專家,要從{搜索結果}里找出和{query}里最相關的2條結果,保存他們的結果,保留result_id、title、description、url,嚴格以json格式輸出",model=self.model,output_language='中文')#景點抓取agentself.attraction_agent = ChatAgent(system_message="你是一個旅游信息提取專家,要根據內容提取出景點信息并返回json格式,嚴格以json格式輸出",model=self.model,output_language='中文')#美食抓取agentself.food_agent = ChatAgent(system_message="你是一個旅游信息提取專家,要根據內容提取出美食信息并返回json格式,嚴格以json格式輸出",model=self.model,output_language='中文')#base攻略生成agentself.base_guide_agent = ChatAgent(system_message="你是一個旅游攻略生成專家,要根據內容生成一個旅游攻略,嚴格以json格式輸出",model=self.model,output_language='中文')# self.firecrawl = Firecrawl()#后續功能self.search_toolkit = SearchToolkit()def extract_json_from_response(self,response_content: str) -> List[Dict[str, Any]]:"""從LLM響應中提取JSON內容"""try:# 找到JSON內容的開始和結束位置start = response_content.find('```json\n') + 8end = response_content.find('\n```', start)if start == -1 or end == -1:print("未找到JSON內容的標記")return []json_str = response_content[start:end].strip()print(f"提取的JSON字符串: {json_str}") # 調試信息# 解析 JSON 字符串parsed = json.loads(json_str)# 處理不同的JSON結構if isinstance(parsed, dict) and "related_results" in parsed:return parsed["related_results"]elif isinstance(parsed, list):return parsedelse:print("未找到預期的JSON結構")return []except json.JSONDecodeError as e:print(f"解析JSON失敗: {str(e)}")print(f"原始內容: {response_content}")return []except Exception as e:print(f"發生錯誤: {str(e)}")return []def search_and_rerank(self) -> Dict[str, Any]:"""多次搜索并重排序,整合信息"""city = self.citydays = self.daysall_results = {}# 第一次搜索:旅游攻略try:query = f"{city}{days}天旅游攻略 最佳路線"search_results = self.search_toolkit.search_google(query=query, num_result_pages=5)prompt = f"請從以下搜索結果中篩選出最相關的{self.days}條{city}{days}天旅游攻略信息,并按照相關性排序:\n{json.dumps(search_results, ensure_ascii=False, indent=2)}"response = self.reranker_agent.step(prompt)all_results["guides"] = self.extract_json_from_response(response.msgs[0].content)except Exception as e:print(f"旅游攻略搜索失敗: {str(e)}")all_results["guides"] = []# 第二次搜索:必去景點try:query = f"{city} 必去景點 top10 著名景點"search_results = self.search_toolkit.search_google(query=query, num_result_pages=5)prompt = f"請從以下搜索結果中篩選出最多{self.days}條{city}最值得去的景點信息,并按照熱門程度排序:\n{json.dumps(search_results, ensure_ascii=False, indent=2)}"response = self.reranker_agent.step(prompt)all_results["attractions"] = self.extract_json_from_response(response.msgs[0].content)except Exception as e:print(f"景點搜索失敗: {str(e)}")all_results["attractions"] = []# 第三次搜索:必吃美食try:query = f"{city} 必吃美食 特色小吃 推薦"search_results = self.search_toolkit.search_google(query=query, num_result_pages=5)prompt = f"請從以下搜索結果中篩選出最多{self.days}條{city}最具特色的美食信息,并按照推薦度排序:\n{json.dumps(search_results, ensure_ascii=False, indent=2)}"response = self.reranker_agent.step(prompt)all_results["must_eat"] = self.extract_json_from_response(response.msgs[0].content)except Exception as e:print(f"必吃美食搜索失敗: {str(e)}")all_results["must_eat"] = []# 第四次搜索:特色美食try:query = f"{city} 特色美食 地方小吃 傳統美食"search_results = self.search_toolkit.search_google(query=query, num_result_pages=5)prompt = f"請從以下搜索結果中篩選出最多{self.days}條{city}獨特的地方特色美食信息,并按照特色程度排序:\n{json.dumps(search_results, ensure_ascii=False, indent=2)}"response = self.reranker_agent.step(prompt)all_results["local_food"] = self.extract_json_from_response(response.msgs[0].content)except Exception as e:print(f"特色美食搜索失敗: {str(e)}")all_results["local_food"] = []# 整合所有信息final_result = {"city": city,"days": days,"travel_info": {"guides": [{"result_id": item.get("result_id"),"title": item.get("title"),"description": item.get("description"),"long_description": item.get("long_description"),}for item in all_results["guides"]],"attractions": [{"result_id": item.get("result_id"),"title": item.get("title"),"description": item.get("description"),"long_description": item.get("long_description"),}for item in all_results["attractions"]],"must_eat": [{"result_id": item.get("result_id"),"title": item.get("title"),"description": item.get("description"),"long_description": item.get("long_description"),}for item in all_results["must_eat"]],"local_food": [{"result_id": item.get("result_id"),"title": item.get("title"),"description": item.get("description"),"long_description": item.get("long_description"),}for item in all_results["local_food"]]}}return final_resultdef extract_attractions_and_food(self) -> Dict:travel_info = self.search_and_rerank()# 提供一個base攻略路線,直接根據整個travel_info生成prompt = f"""參考以下信息,生成一個{self.city}{self.days}天攻略路線,直接根據整個travel_info生成{travel_info}【輸出格式】{{"base_guide": "攻略內容"}}"""base_guide = self.base_guide_agent.step(prompt)print(f"這是base攻略: {base_guide.msgs[0].content}")"""提取景點和美食信息"""# 從描述中提取具體的景點和美食attractions_text = " ".join([item["description"] for item in travel_info["travel_info"]["attractions"] + travel_info["travel_info"]["guides"]])print(f"這是景點信息: {attractions_text}")food_text = " ".join([item["description"] for item in travel_info["travel_info"]["must_eat"] + travel_info["travel_info"]["local_food"]])print(f"這是美食信息: {food_text}")# 使用LLM提取并整理信息attractions_prompt = f"""請從以下文本中提取出具體的景點名稱,注意不能遺漏景點信息,要盡量多提取景點信息,并為每個景點提供簡短描述:{attractions_text}請以JSON格式返回,格式如下:{{"attractions": [{{"name": "景點名稱", "description": "簡短描述"}}]}}"""food_prompt = f"""請從以下文本中提取出具體的美食名稱或者美食店鋪,注意不能遺漏美食信息,要盡量多提取美食信息,并為每個美食和店鋪提供簡短描述:{food_text}請以JSON格式返回,格式如下:{{"foods": [{{"name": "美食名稱", "description": "簡短描述"}}],"food_shop": [{{"name": "美食店鋪", "description": "簡短描述"}}]}}"""# 使用attraction_agent處理提取attractions_response = self.attraction_agent.step(attractions_prompt)foods_response = self.food_agent.step(food_prompt)print(f"這是景點信息: {attractions_response.msgs[0].content}")print(f"這是美食信息: {foods_response.msgs[0].content}")return {"base_guide": base_guide.msgs[0].content,"attractions": attractions_response.msgs[0].content,"foods": foods_response.msgs[0].content}def process_attractions_and_food(self) -> Dict:def clean_json_string(json_str: str) -> str:"""清理JSON字符串,移除markdown代碼塊標記"""# 移除 ```json 開頭if '```json' in json_str:json_str = json_str.split('```json')[-1]# 移除 ```結尾if '```' in json_str:json_str = json_str.split('```')[0]return json_str.strip()city = self.city"""處理景點和美食信息,添加圖片URL"""# 獲取原始數據results = self.extract_attractions_and_food()# 解析JSON字符串base_guide = json.loads(clean_json_string(results['base_guide']))attractions_data = json.loads(clean_json_string(results['attractions']))foods_data= json.loads(clean_json_string(results['foods']))foods_list = foods_data['foods']food_shops_list = foods_data['food_shop']# 創建結果字典result = {"city": city,"days": self.days,"base路線": base_guide,"景點": [],"美食": [],"美食店鋪": []}# 處理景點信息for attraction in attractions_data['attractions']:try:# 使用DuckDuckGo搜索圖片images = self.search_toolkit.search_duckduckgo(query=f"{city} {attraction['name']} 實景圖",source="images",max_results=1)# 添加圖片URLattraction_with_image = {"name": attraction['name'],"describe": attraction['description'],"圖片url": images[0]["image"] if images else "",}result['景點'].append(attraction_with_image)except Exception as e:print(f"搜索{attraction['name']}的圖片時出錯: {str(e)}")# 如果出錯,仍然添加景點信息,但不包含圖片URLresult['景點'].append({"name": attraction["name"],"describe": attraction["description"],"圖片url": "",})# 處理美食信息for food in foods_list:try:# 使用DuckDuckGo搜索圖片images = self.search_toolkit.search_duckduckgo(query=f"{city} {food['name']} 美食",source="images",max_results=1)# 添加圖片URLfood_with_image = {"name": food["name"],"describe": food["description"],"圖片url": images[0]["image"] if images else "",}result['美食'].append(food_with_image)except Exception as e:print(f"搜索{food['name']}的圖片時出錯: {str(e)}")# 如果出錯,仍然添加美食信息,但不包含圖片URLresult['美食'].append({"name": food["name"],"describe": food["description"],"圖片url": ""})# 處理美食店鋪信息for food_shop in food_shops_list:try:# 使用DuckDuckGo搜索圖片images = self.search_toolkit.search_duckduckgo(query=f"{city} {food_shop['name']} 美食店鋪",source="images",max_results=1)# 添加圖片URLfood_shop_with_image = {"name": food_shop["name"],"describe": food_shop["description"],"圖片url": images[0]["image"] if images else "",}result['美食店鋪'].append(food_shop_with_image)except Exception as e:print(f"搜索{food_shop['name']}的圖片時出錯: {str(e)}")# 如果出錯,仍然添加美食店鋪信息,但不包含圖片URLresult['美食店鋪'].append({"name": food_shop["name"],"describe": food_shop["description"],"圖片url": ""})try:# 獲取當前腳本所在目錄current_dir = os.path.dirname(os.path.abspath(__file__))# 創建storage目錄路徑storage_dir = os.path.join(current_dir, "storage")# 確保storage目錄存在os.makedirs(storage_dir, exist_ok=True)# 生成文件名(使用城市名和日期)filename = os.path.join(storage_dir, f"{self.city}{self.days}天旅游信息.json")# 將結果寫入JSON文件with open(filename, 'w', encoding='utf-8') as f:json.dump(result, f, ensure_ascii=False, indent=4)print(f"旅游攻略已保存到文件:{filename}")except Exception as e:print(f"保存JSON文件時出錯: {str(e)}")return result@app.route('/get_travel_plan', methods=['POST'])
def get_travel_plan():try:# 獲取請求數據data = request.get_json()# 驗證輸入數據if not data or 'city' not in data or 'days' not in data:return jsonify({'status': 'error','message': '請求必須包含city和days參數'}), 400city = data['city']days = data['days']# 驗證days是否為整數try:days = int(days)except ValueError:return jsonify({'status': 'error','message': 'days參數必須為整數'}), 400# 創建TravelPlanner實例并獲取結果travel_planner = TravelPlanner(city=city, days=days)results = travel_planner.process_attractions_and_food()return jsonify({'status': 'success','data': results})except Exception as e:return jsonify({'status': 'error','message': f'處理請求時發生錯誤: {str(e)}'}), 500
if __name__ == '__main__':app.run(host='0.0.0.0', port=5002, debug=True)
同樣的,我們在本地的5002端口啟動了一個服務,我們使用requests庫來調用測試一下效果:
import requests
import json# API端點
url = "http://localhost:5000/get_travel_plan"# 請求數據
data = {"city": "上海","days": 3
}# 發送POST請求
try:response = requests.post(url, json=data)# 檢查響應狀態if response.status_code == 200:result = response.json()print("獲取到的旅游計劃:")print(json.dumps(result, ensure_ascii=False, indent=2))else:print(f"請求失敗: {response.status_code}")print(f"錯誤信息: {response.text}")except requests.exceptions.RequestException as e:print(f"發送請求時發生錯誤: {e}")
這個模塊用于搜集和整理旅游信息。信息主要包括旅游的一些景點、美食信息及對應圖片的url,以便于我們后面將他們轉成圖文攻略。
在大語言模型的應用開發中,我們常常使用JSON作為中間數據的邏輯保存格式,因為交互方便,很好地表示結構化的信息且方便人類閱讀和理解。
以下是生成的三份參考結果
filename = os.path.join(storage_dir, f"{self.city}{self.days}天旅游信息.json")
命名邏輯是 {地點}+{時間}+旅游信息.json
新疆7天旅游信息.json
成都3天旅游信息.json
上海3天旅游信息.json