農產品價格報告爬蟲使用說明
# **************************************************************************
# * *
# * 農產品價格報告爬蟲 *
# * *
# * 作者: xiaohai *
# * 版本: v1.0.0 *
# * 日期: 2024-12-05 *
# * *
# * 功能說明: *
# * 1. 日度報告 *
# * - 生成今日分析報告 *
# * - 生成指定日期報告 *
# * - 包含價格指數、分品類分析等 *
# * *
# * 2. 周度報告 *
# * - 生成本周分析報告 *
# * - 生成指定周報告 *
# * - 匯總周內價格變化 *
# * *
# * 3. 價格走勢 *
# * - 農產品價格200指數走勢 *
# * - 豬肉價格全國走勢 *
# * - 豬肉價格區域走勢 *
# * - 糧油價格指數走勢 *
# * *
# * 4. 數據導出 *
# * - 支持Excel格式導出 *
# * - 包含多個數據分類 *
# * - 支持時間范圍選擇 *
# * *
# * : 農業農村部市場信息中心 *
# * 版權聲明: 僅用于學習交流 *
# * *
# **************************************************************************import os
import json
import logging
import requests
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import urllib3
import sys
import subprocess
import pkg_resources
from bs4 import BeautifulSoup
import re
import time# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
warnings.filterwarnings('ignore')# 配置常量
VERSION = 'v1.0.0'
AUTHOR = 'xiaohai'
DATA_SOURCE = '農業農村部市場信息中心'# API配置
API_BASE_URL = 'https://ncpscxx.moa.gov.cn'
API_ENDPOINTS = {'price_index': '/product/common-price-index/getIndexList','variety_list': '/product/sys-variety/selectList','price_trend': '/product/price-info/getPriceInfoList','market_list': '/product/sys-market/selectList','daily_price': '/product/price-info/getDailyPrice','analysis_report': '/product/analysis-report/pageList'
}# 輸出目錄配置
OUTPUT_DIRS = {'base': 'reports','daily': 'reports/daily','weekly': 'reports/weekly'
}# 圖表樣式配置
CHART_STYLE = {'figure': {'figsize': (12, 6),'facecolor': '#f8fcfa'},'grid': {'linestyle': '--','alpha': 0.3,'color': 'gray'},'line': {'marker': 'o','markersize': 4,'linewidth': 2},'colors': {'blue': '#40a9ff','green': '#73d13d','orange': '#ffa940','red': '#ff4d4f','purple': '#9254de','cyan': '#36cfc9'}
}def check_and_install_packages():"""檢查并安裝所需的包"""required_packages = {'requests': 'requests', # HTTP請求'pandas': 'pandas', # 數據處理'matplotlib': 'matplotlib', # 繪圖支持'urllib3': 'urllib3', # HTTP客戶端'openpyxl': 'openpyxl', # Excel支持'colorama': 'colorama' # 控制臺顏色}print("\n" + "="*50)print("檢查并安裝依賴包...")print("="*50)try:import coloramacolorama.init()success_mark = colorama.Fore.GREEN + "?" + colorama.Style.RESET_ALLerror_mark = colorama.Fore.RED + "?" + colorama.Style.RESET_ALLexcept ImportError:success_mark = "?"error_mark = "?"all_success = Truefor package, import_name in required_packages.items():try:pkg_resources.require(package)print(f"{success_mark} {package:15} 已安裝")except (pkg_resources.DistributionNotFound, pkg_resources.VersionConflict):print(f"{error_mark} {package:15} 未安裝,正在安裝...")try:subprocess.check_call([sys.executable, "-m", "pip", "install", "--disable-pip-version-check","--no-cache-dir",package], stdout=subprocess.DEVNULL)print(f"{success_mark} {package:15} 安裝成功")except Exception as e:print(f"{error_mark} {package:15} 安裝失敗: {str(e)}")all_success = Falseprint("\n依賴包檢查" + ("全部完成" if all_success else "存在問題"))print("="*50 + "\n")if not all_success:print("某些依賴包安裝失敗,程序能無法正常運行!")if input("是否繼續運行?(y/n): ").lower() != 'y':sys.exit(1)class ReportCrawler:"""農產品價格報告爬蟲"""def __init__(self):# 禁用SSL警告warnings.filterwarnings('ignore')# 基礎配置self._setup_directories()self._setup_logger()self._setup_api()def _setup_directories(self):"""創建輸出目錄"""self.output_dir = "reports"self.daily_dir = os.path.join(self.output_dir, "daily")self.weekly_dir = os.path.join(self.output_dir, "weekly")for d in [self.output_dir, self.daily_dir, self.weekly_dir]:if not os.path.exists(d):os.makedirs(d)def _setup_logger(self):"""配置日志"""log_file = os.path.join("logs", f"crawler_{datetime.now().strftime('%Y%m%d')}.log")os.makedirs("logs", exist_ok=True)formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s',datefmt='%Y-%m-%d %H:%M:%S')# 文件處理器file_handler = logging.FileHandler(log_file, encoding='utf-8')file_handler.setFormatter(formatter)# 制臺處理器console_handler = logging.StreamHandler()console_handler.setFormatter(formatter)# 配置日志器self.logger = logging.getLogger(__name__)self.logger.setLevel(logging.INFO)self.logger.addHandler(file_handler)self.logger.addHandler(console_handler)def _setup_api(self):"""配置API"""self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36','Origin': 'https://ncpscxx.moa.gov.cn','Referer': 'https://ncpscxx.moa.gov.cn/','Accept': 'application/json, text/plain, */*','Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8','Content-Type': 'application/json;charset=UTF-8'}def show_menu(self):"""顯示功能菜單"""menu = """
農產品價格報告爬蟲系統
====================
1. 成今日分析報告
2. 生成本周分報告
3. 生成指定日期報告
4. 生成指定周報告
5. 生成價格指數走勢圖
6. 生成豬肉價格走勢圖
7. 生成區域價格走勢圖
8. 生成糧油價格走勢圖
9. 導出Excel數據
0. 退出系統請輸入���能編號(0-9): """print("\n" + "="*50) # 添加分隔線choice = input(menu)print("="*50 + "\n") # 添加分隔線return choicedef run(self):"""運行系統"""while True:choice = self.show_menu()if choice == "0":print("感謝使用,再見!")breakelif choice == "1":print("正在生成今日分析報告...")self.generate_daily_report(datetime.now())elif choice == "2":print("正在生成本周分析報告...")today = datetime.now()self.generate_weekly_report(today.year, int(today.strftime("%W")))elif choice == "3":date_str = input("請輸入日期(格式:YYYY-MM-DD): ")try:date = datetime.strptime(date_str, "%Y-%m-%d")self.generate_daily_report(date)except:print("日期格式錯誤!")elif choice == "4":year = int(input("請輸入年份: "))week = int(input("請輸入周數(1-52): "))self.generate_weekly_report(year, week)elif choice == "5":days = int(input("請輸入要查看的天數: "))end = datetime.now()start = end - timedelta(days=days)self.plot_index_trend(start, end)elif choice == "6":days = int(input("請輸入要查看的天數: "))end = datetime.now()start = end - timedelta(days=days)self.plot_pig_price_trend(start, end)elif choice == "7":days = int(input("請輸入要查看的天數: "))end = datetime.now()start = end - timedelta(days=days)self.plot_pig_price_region_trend(start, end)elif choice == "8":days = int(input("請輸入要查看的天數: "))end = datetime.now()start = end - timedelta(days=days)self.plot_grain_price_trend(start, end)elif choice == "9":days = int(input("請輸入要導天數: "))end = datetime.now()start = end - timedelta(days=days)self.export_data(start, end)else:print("無效的選擇,請重試!")input("\n按回車鍵繼續...")def _make_request(self, url, method='get', params=None, data=None):"""發送HTTP請求Args:url: 請求URLmethod: 請求方法,支持 'get'/'post'params: URL參數data: POST數據Returns:Response對象或None(請求失敗)"""try:headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}if method.lower() == 'get':response = requests.get(url,params=params,headers=headers,verify=False,timeout=10)else:response = requests.post(url,params=params,json=data, # 添加json參數支持headers=headers,verify=False,timeout=10)response.raise_for_status()return responseexcept requests.exceptions.RequestException as e:self.logger.error(f"請求失敗: {str(e)}")return Nonedef fetch_daily_report(self, date):"""獲取日度價格報告"""try:url = f"{API_BASE_URL}/api/FarmDaily/list"data = {"daylyDate": date.strftime("%Y-%m-%d")}response = self._make_request(url, method='post', data=data)if not response:return Nonedata = response.json()if data.get("code") == 200 and data.get("content",{}).get("list"):# 找到指定日期的報告target_date = date.strftime("%Y-%m-%d")for report in data["content"]["list"]:if report["daylyDate"].startswith(target_date):# 提取所需數據return {"conclusion": report["counclesion"],"indexConclusion": report["indexConclusion"],"animalConclusion": report["animalConclusion"],"aquaticConclusion": report["aquaticConclusion"],"vegetablesConclusion": report["vegetablesConclusion"],"fruitsConclusion": report["fruitsConclusion"],"content": report["countent"],"incOrReduRange": report["incOrReduRange"]}self.logger.warning(f"未找到{target_date}的報告")return Noneself.logger.warning(f"獲取數據失敗: {data.get('message', '未知錯誤')}")return Noneexcept Exception as e:self.logger.error(f"獲取日度報告出錯: {str(e)}")return Nonedef _extract_conclusions(self, report):"""從報告中提取各類結論"""try:return {"index": report.get("indexConclusion", ""),"animal": report.get("animalConclusion", ""),"aquatic": report.get("aquaticConclusion", ""),"vegetables": report.get("vegetablesConclusion", ""),"fruits": report.get("fruitsConclusion", ""),"range": report.get("incOrReduRange", "")}except Exception as e:self.logger.error(f"提取論出錯: {str(e)}")return {}def fetch_index_data(self, start_date, end_date):"""獲取價格指數數據"""try:url = "https://pfsc.agri.cn/price_portal/pi-info-day/getPortalPiInfoDay"response = requests.post(url, headers=self.headers, verify=False)data = response.json()if data["code"] == 200:result = []for item in data["content"]:pub_date = datetime.strptime(item["publishDate"], "%Y-%m-%d")if start_date <= pub_date <= end_date:result.append({"日期": item["publishDate"],"農產品批發價格200指數": item["agriculture"],"糧油指數": item["grainAndOil"],"籃子數": item["vegetableBasket"]})return resultreturn Noneexcept Exception as e:self.logger.error(f"獲取指數數據失敗: {str(e)}")return Nonedef fetch_pig_price_data(self, start_date, end_date):"""獲取豬肉價格數據"""try:url = f"{API_BASE_URL}{API_ENDPOINTS['variety_list']}"params = {'pid': 'MH'} # 豬肉品類IDresponse = self._make_request(url, method='post', params=params)if not response:return Nonedata = response.json()if data.get("code") == 200 and data.get("data"):# 轉換數據格式result = []for item in data["data"]:if start_date <= datetime.strptime(item["date"], "%Y-%m-%d") <= end_date:result.append({"日期": item["date"],"全國": float(item["national"]),"東北": float(item["northEast"]),"華北": float(item["northChina"]),"華東": float(item["eastChina"]),"華中": float(item["centralChina"]),"華南": float(item["southChina"]),"西南": float(item["southWest"])})return resultself.logger.warning(f"獲取數據失敗: {data.get('message', '未知錯誤')}")return Noneexcept Exception as e:self.logger.error(f"獲取豬肉價格數據失敗: {str(e)}")return Nonedef fetch_grain_price_data(self, start_date, end_date):"""獲取糧油價格數據"""try:url = f"{API_BASE_URL}{API_ENDPOINTS['variety_list']}"params = {'pid': 'TL'} # 糧油品類IDresponse = self._make_request(url, method='post', params=params)if not response:return Nonedata = response.json()if data.get("code") == 200 and data.get("data"):# 轉換數據格式result = []for item in data["data"]:if start_date <= datetime.strptime(item["date"], "%Y-%m-%d") <= end_date:result.append({"日期": item["date"],"通義糧價指數": float(item["grainPriceIndex"]),"通義糧市指數": float(item["grainMarketIndex"]),"通義糧市第1號": float(item["grainMarketNo1"]),"通義糧天指數": float(item["grainDayIndex"]),"通義���指": float(item["grainIndex"]),"通義糧天指數(干糧)": float(item["grainDayDryIndex"])})return resultself.logger.warning(f"獲取數據失敗: {data.get('message', '未知錯誤')}")return Noneexcept Exception as e:self.logger.error(f"獲取糧油價格數據失敗: {str(e)}")return Nonedef generate_daily_report(self, date):"""生成每日分析報告"""try:report_data = self.fetch_daily_report(date)if not report_data:self.logger.warning(f"未獲取到 {date.strftime('%Y-%m-%d')} 的報告數據")returnreport_file = os.path.join(self.daily_dir,f"{date.strftime('%Y年%m月%d日')}_價格分析報告.md")# 使用更清晰模板格式content = f"""# {date.strftime('%Y年%m月%d日')} 農產品價格分析報告## 一、價格指數變化
{report_data["indexConclusion"]}## 二、分品類分析### 1. 畜禽產品
{report_data["animalConclusion"]}### 2. 水產品
{report_data["aquaticConclusion"]}### 3. 蔬菜
{report_data["vegetablesConclusion"]}### 4. 水果
{report_data["fruitsConclusion"]}## 三、價格波動情況
{report_data["incOrReduRange"]}## 四、數據說明
- 數據來源: {report_data["source"]}
- 生成時間: {datetime.now().strftime('%Y年%m月%d日 %H:%M:%S')}
- 價格單位: 元/斤
- 漲跌幅: 與上一交易日相比---
*注: 本報告由系統自動生成,僅供參考。*
"""with open(report_file, "w", encoding="utf-8") as f:f.write(content)self.logger.info(f"分析報告已生成: {report_file}")except Exception as e:self.logger.error(f"生成分析報告失敗: {str(e)}")def generate_weekly_report(self, year, week):"""生成周度匯總報告"""try:start_date = datetime.strptime(f'{year}-W{week:02d}-1', '%Y-W%W-%w')end_date = start_date + timedelta(days=6)print(f"\n正在生成第{week}周報告...")print(f"時間范圍: {start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}")print("="*50)# 獲周內所有報告reports = []current = start_datetotal_days = (end_date - start_date).days + 1for i in range(total_days):print(f"\r進度: {i+1}/{total_days} ", end="")report = self.fetch_daily_report(current)if report:reports.append(report)current += timedelta(days=1)if not reports:self.logger.warning("本周無可用數據")return# 計算周度匯總數據weekly_summary = self._calculate_weekly_summary(reports)report_file = os.path.join(self.weekly_dir,f"{year}年第{week:02d}周_{start_date.strftime('%m月%d日')}-{end_date.strftime('%m月%d日')}_價格分析報告.md")with open(report_file, "w", encoding="utf-8") as f:f.write(f"""# {year}年第{week:02d}周農產品價格分析報告
({start_date.strftime('%Y年%m月%d日')} 至 {end_date.strftime('%Y年%m月%d日')})## 一、本周價格概況
{weekly_summary['overview']}## 二、價格指數變化
- 周初: {weekly_summary['index_start']}
- 周末: {weekly_summary['index_end']}
- 度變化: {weekly_summary['index_change']}## 三、分品類周度分析
### 1. 畜禽產品
{weekly_summary['animal_summary']}### 2. 水產品
{weekly_summary['aquatic_summary']}### 3. 蔬菜
{weekly_summary['vegetables_summary']}### 4. 水果
{weekly_summary['fruits_summary']}## 四、日度價格詳情
""")for report in reports:pub_date = datetime.strptime(report['daylyDate'][:10], '%Y-%m-%d')f.write(f"""### {pub_date.strftime('%Y年%m月%d日')}
1. 價格指數: {report.get('indexConclusion', '暫無數據')}
2. 畜禽產品: {report.get('animalConclusion', '暫無數據')}
3. 水產品: {report.get('aquaticConclusion', '暫無數據')}
4. 蔬菜: {report.get('vegetablesConclusion', '暫無數據')}
5. 水果: {report.get('fruitsConclusion', '暫無數據')}""")f.write(f"""## 五、數據說明
- 數據來源: {reports[0]["source"]}
- 生成時間: {datetime.now().strftime('%Y年%m月%d日 %H:%M:%S')}
- 價格單位: 元/公斤
- 跌幅: 與上期相比---
*注: 本報告由系統自動生成,僅供參考。*""")print("\n報告生成完成!")self.logger.info(f"周度報告已生成: {report_file}")except Exception as e:self.logger.error(f"生成周度報告失敗: {str(e)}")def _calculate_weekly_summary(self, reports):"""計算周度匯總數據"""summary = {'overview': '','index_start': reports[0].get('indexConclusion', '暫無數據'),'index_end': reports[-1].get('indexConclusion', '暫無數據'),'index_change': '','animal_summary': '','aquatic_summary': '','vegetables_summary': '','fruits_summary': ''}# 計算價格指數變化try:start_index = float(reports[0]['indexConclusion'].split('為')[1].split(',')[0])end_index = float(reports[-1]['indexConclusion'].split('為')[1].split(',')[0])change = end_index - start_indexsummary['index_change'] = f"{'上升' if change >= 0 else '下降'}{abs(change):.2f}個點"except:summary['index_change'] = '數據異常'# 生成概述summary['overview'] = f"本周農產品批發價格200指數從{summary['index_start']},到{summary['index_end']},整體{summary['index_change']}。"# 其他品類匯總...return summarydef plot_index_trend(self, start_date, end_date):"""繪制價格指數走勢圖"""try:data = self.fetch_index_data(start_date, end_date)if not data:returnplt.figure(figsize=(12, 6), facecolor='#f8fcfa')ax = plt.gca()ax.set_facecolor('#f8fcfa')dates = [item["日期"] for item in data]indices = [("農品批發價格200指數", "#ffa940"),("菜籃子指", "#73d13d"),("糧油指數", "#40a9ff")]for name, color in indices:values = [item[name] for item in data]plt.plot(dates, values, color=color, marker='o',markersize=4, linewidth=2, label=name)plt.title('農業農村部"農產品批發價格200指數"日度走勢圖',pad=20, fontsize=12, loc='left')plt.grid(True, linestyle='--', alpha=0.3)plt.xticks(rotation=45)plt.legend(loc='upper right', frameon=False)plt.tight_layout()plt.savefig(os.path.join(self.output_dir, "價格指數走勢圖.png"),dpi=300,bbox_inches='tight')plt.close()self.logger.info("價格指數走勢已生成")except Exception as e:self.logger.error(f"生成價格指數走勢圖失敗: {str(e)}")def plot_pig_price_trend(self, start_date, end_date):"""繪制豬肉價格走勢圖"""try:data = self.fetch_pig_price_data(start_date, end_date)if not data:returnplt.figure(figsize=(12, 6), facecolor='#f8fcfa')ax = plt.gca()ax.set_facecolor('#f8fcfa')dates = [item["日期"] for item in data]values = [item["全國"] for item in data]plt.plot(dates, values, color='#40a9ff', marker='o',markersize=4, linewidth=2)plt.fill_between(dates, values, color='#e6f7ff', alpha=0.5)plt.title('"瘦肉型白條豬肉出廠價格指數"全國走勢圖',pad=20, fontsize=12, loc='left')plt.grid(True, linestyle='--', alpha=0.3)plt.xticks(rotation=45)plt.tight_layout()plt.savefig(os.path.join(self.output_dir, "豬肉價格走勢圖.png"),dpi=300,bbox_inches='tight')plt.close()self.logger.info("豬肉價格走勢圖已生成")except Exception as e:self.logger.error(f"生成豬肉價格走勢圖失敗: {str(e)}")def plot_pig_price_region_trend(self, start_date, end_date):"""繪制豬肉分區域價格走勢圖"""try:data = self.fetch_pig_price_data(start_date, end_date)if not data:returnplt.figure(figsize=(12, 6), facecolor='#f8fcfa')ax = plt.gca()ax.set_facecolor('#f8fcfa')dates = [item["日期"] for item in data]regions = [("東北", "#40a9ff"),("華南", "#73d13d"),("華", "#ffa940"),("華中", "#ff4d4f"),("華東", "#9254de"),("西南", "#36cfc9")]for region, color in regions:values = [item[region] for item in data]plt.plot(dates, values, color=color, marker='o',markersize=4, linewidth=2, label=region)plt.title('"瘦肉型條豬肉出廠價格指數"區域走勢圖',pad=20, fontsize=12, loc='left')plt.grid(True, linestyle='--', alpha=0.3)plt.xticks(rotation=45)plt.legend(loc='upper right', frameon=False)plt.tight_layout()plt.savefig(os.path.join(self.output_dir, "豬肉價格區域走勢圖.png"),dpi=300,bbox_inches='tight')plt.close()self.logger.info("豬肉價格區域走勢圖已生成")except Exception as e:self.logger.error(f"生成豬肉價格區域走勢圖失敗: {str(e)}")def plot_grain_price_trend(self, start_date, end_date):"""繪制糧油價格走勢圖"""try:data = self.fetch_grain_price_data(start_date, end_date)if not data:returnplt.figure(figsize=(12, 6), facecolor='#f8fcfa')ax = plt.gca()ax.set_facecolor('#f8fcfa')dates = [item["日期"] for item in data]indices = [("通義糧價指數", "#40a9ff"),("通義糧市指數", "#73d13d"),("通義糧市第1號", "#ffa940"),("通義糧天指數", "#ff4d4f"),("通義糧指", "#9254de"),("通義糧天指數(干糧)", "#36cfc9")]for name, color in indices:values = [item[name] for item in data]plt.plot(dates, values, color=color, marker='o',markersize=4, linewidth=2, label=name)plt.title('中國通義糧油發價格指數走勢圖',pad=20, fontsize=12, loc='left')plt.grid(True, linestyle='--', alpha=0.3)plt.xticks(rotation=45)plt.legend(loc='upper right', frameon=False)plt.tight_layout()plt.savefig(os.path.join(self.output_dir, "糧油價格指數走勢圖.png"),dpi=300,bbox_inches='tight')plt.close()self.logger.info("糧油價格指數走勢圖已生成")except Exception as e:self.logger.error(f"生成糧油價格指數走勢失敗: {str(e)}")def export_data(self, start_date, end_date, format='excel'):"""導出數據Args:start_date: 開始日期end_date: 結束日期format: 導出格式,支持 'excel'/'csv'/'json'"""try:# 獲取數據data = {'index': self.fetch_index_data(start_date, end_date),'pig': self.fetch_pig_price_data(start_date, end_date),'grain': self.fetch_grain_price_data(start_date, end_date)}if not any(data.values()):return# 根據格式導出if format == 'excel':self._export_excel(data, start_date, end_date)elif format == 'csv':self._export_csv(data, start_date, end_date)elif format == 'json':self._export_json(data, start_date, end_date)else:self.logger.error(f"不支持的導出格式: {format}")except Exception as e:self.logger.error(f"導出數據失敗: {str(e)}")def _clean_text(self, text):"""清理文本內容"""if not text:return ""# 去除多余空白字符text = ' '.join(text.split())# 修復可能的標點符號問題text = text.replace('。。', '。').replace(',。', '。').replace(';。', '。')# 修復中文編碼text = text.encode('utf-8').decode('utf-8', 'ignore')return textdef _validate_report_data(self, report):"""驗證報告數據完整性"""required_fields = ["indexConclusion","animalConclusion","aquaticConclusion","vegetablesConclusion","fruitsConclusion"]is_valid = Truefor field in required_fields:if not report.get(field):self.logger.warning(f"報告缺少 {field} 數據")is_valid = Falsereport[field] = "暫無數據"return is_validdef _export_excel(self, data, start_date, end_date):"""導出Excel數據"""try:filename = f"價格數據_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}.xlsx"filepath = os.path.join(self.output_dir, filename)with pd.ExcelWriter(filepath) as writer:# 導出價格指數if data.get('index'):df_index = pd.DataFrame(data['index'])df_index.to_excel(writer, sheet_name='價格指數', index=False)# 導出豬肉價格if data.get('pig'):df_pig = pd.DataFrame(data['pig'])df_pig.to_excel(writer, sheet_name='豬肉價格', index=False)# 導出糧油價格if data.get('grain'):df_grain = pd.DataFrame(data['grain'])df_grain.to_excel(writer, sheet_name='糧油價格', index=False)self.logger.info(f"���據已導出至: {filepath}")return Trueexcept Exception as e:self.logger.error(f"導出Excel失敗: {str(e)}")return Falsedef fetch_all_categories(self):"""獲取所有品類數據"""categories = {'MH': '豬肉','SC': '蔬菜','SG': '水果','TL': '糧油','SC': '水產','DJ': '蛋雞','NR': '牛肉','YR': '羊肉'}result = {}for code, name in categories.items():try:url = f"{API_BASE_URL}{API_ENDPOINTS['variety_list']}"params = {'pid': code}response = self._make_request(url, method='post', params=params)if response and response.json().get("code") == 200:result[name] = response.json().get("data", [])except Exception as e:self.logger.error(f"獲取{name}品類數據失敗: {str(e)}")return resultdef fetch_market_prices(self, market_id=None, variety_id=None, start_date=None, end_date=None):"""獲取市場價格數據"""try:url = f"{API_BASE_URL}{API_ENDPOINTS['daily_price']}"params = {'marketId': market_id,'varietyId': variety_id,'startDate': start_date.strftime("%Y-%m-%d") if start_date else None,'endDate': end_date.strftime("%Y-%m-%d") if end_date else None}response = self._make_request(url, method='post', params=params)if response and response.json().get("code") == 200:return response.json().get("data", [])return Noneexcept Exception as e:self.logger.error(f"獲取市場價格數據失敗: {str(e)}")return Nonedef fetch_analysis_reports(self, page=1, page_size=10):"""獲取分析報告列表"""try:url = f"{API_BASE_URL}{API_ENDPOINTS['analysis_report']}"params = {'pageNum': page,'pageSize': page_size}response = self._make_request(url, method='post', params=params)if response and response.json().get("code") == 200:return response.json().get("data", {}).get("list", [])return Noneexcept Exception as e:self.logger.error(f"獲取分析報告失敗: {str(e)}")return Nonedef crawl_all_data(self, start_date, end_date):"""爬取所有數據"""try:# 獲取所有品類categories = self.fetch_all_categories()# 獲取所有市場markets_response = self._make_request(f"{API_BASE_URL}{API_ENDPOINTS['market_list']}", method='post')markets = markets_response.json().get("data", []) if markets_response else []# 存儲結果results = {'categories': categories,'markets': markets,'prices': {},'reports': []}# 獲取每個品類的價格數據for category, varieties in categories.items():results['prices'][category] = {}for variety in varieties:variety_id = variety.get('id')if variety_id:prices = self.fetch_market_prices(variety_id=variety_id,start_date=start_date,end_date=end_date)results['prices'][category][variety.get('name')] = prices# 獲取分析報告page = 1while True:reports = self.fetch_analysis_reports(page=page)if not reports:breakresults['reports'].extend(reports)page += 1return resultsexcept Exception as e:self.logger.error(f"爬取所有數據失敗: {str(e)}")return Nonedef fetch_weekly_report_content(self, report_id=None):"""獲取周度報告內容"""try:url = f"{API_BASE_URL}/product/analysis-report/getReportContent"params = {'id': report_id} if report_id else Noneresponse = self._make_request(url, method='post', params=params)if not response:return None# 解析HTML內容soup = BeautifulSoup(response.text, 'html.parser')# 提取報告基本信息title = soup.find('h1', class_='report-title').text.strip()date = soup.find('div', class_='report-date').text.strip()source = soup.find('div', class_='report-source').text.strip()# 提取報告主體內容content = soup.find('div', class_='report-content')# 提取表格數據tables = []for table in content.find_all('table'):df = pd.read_html(str(table))[0]tables.append(df.to_dict('records'))# 提取文本內容paragraphs = []for p in content.find_all('p'):text = p.text.strip()if text:paragraphs.append(text)return {'title': title,'date': date,'source': source,'content': {'text': paragraphs,'tables': tables}}except Exception as e:self.logger.error(f"獲取報告內容失敗: {str(e)}")return Nonedef crawl_all_reports(self, start_date=None, end_date=None):"""爬取所有報告"""try:reports = []page = 1while True:# 獲取報告列表report_list = self.fetch_analysis_reports(page=page)if not report_list:break# 過濾日期范圍if start_date or end_date:filtered_reports = []for report in report_list:report_date = datetime.strptime(report['publishDate'], '%Y-%m-%d')if start_date and report_date < start_date:continueif end_date and report_date > end_date:continuefiltered_reports.append(report)report_list = filtered_reports# 獲取每個報告的詳細內容for report in report_list:report_content = self.fetch_weekly_report_content(report['id'])if report_content:reports.append({'meta': report,'content': report_content})# 添加延時避免請求過快time.sleep(1)page += 1return reportsexcept Exception as e:self.logger.error(f"爬取報告失敗: {str(e)}")return Nonedef save_reports(self, reports, output_dir='reports'):"""保存報告到文件"""try:if not os.path.exists(output_dir):os.makedirs(output_dir)for report in reports:# 生成文件名date = datetime.strptime(report['meta']['publishDate'], '%Y-%m-%d')filename = f"{date.strftime('%Y%m%d')}_{report['meta']['id']}.json"filepath = os.path.join(output_dir, filename)# 保存為JSON文件with open(filepath, 'w', encoding='utf-8') as f:json.dump(report, f, ensure_ascii=False, indent=2)return Trueexcept Exception as e:self.logger.error(f"保存報告失敗: {str(e)}")return Falseif __name__ == "__main__":try:# 檢查并安裝依賴包check_and_install_packages()# 運行爬蟲crawler = ReportCrawler()crawler.run()except KeyboardInterrupt:print("\n程序已被用中斷")sys.exit(0)except Exception as e:print(f"\n程序運行出錯: {str(e)}")sys.exit(1)
一、功能介紹
本程序用于爬取農業農村部發布的農產品價格監測報告,包括以下功能:
1. 日度報告
- 生成今日分析報告
- 生成指定日期報告
- 包含價格指數、分品類分析等
2. 周度報告
- 生成本周分析報告
- 生成指定周報告
- 匯總周內價格變化
3. 價格走勢
- 農產品價格200指數走勢
- 豬肉價格全國走勢
- 豬肉價格區域走勢
- 糧油價格指數走勢
4. 數據導出
- 支持Excel格式導出
- 包含多個數據分類
- 支持時間范圍選擇
二、使用說明
1. 環境要求
- Python 3.7+
- 依賴包會自動安裝:
- requests: HTTP請求
- pandas: 數據處理
- matplotlib: 繪圖支持
- urllib3: HTTP客戶端
- openpyxl: Excel支持
- colorama: 控制臺顏色
2. 運行方法
python
直接運行程序
python report_crawler.py
3. 功能菜單
農產品價格報告爬蟲系統
生成今日分析報告
生成本周分析報告
生成指定日期報告
生成指定周報告
生成價格指數走勢圖
生成豬肉價格走勢圖
生成區域價格走勢圖
生成糧油價格走勢圖
導出Excel數據
退出系統
4. 輸出文件
- reports/daily: 日度分析報告
- reports/weekly: 周度分析報告
- reports: 價格走勢圖和Excel數據
三、數據來源
- 農業農村部市場信息中心
- 數據更新頻率: 每日14:00
四、注意事項
- 首次運行會自動檢查并安裝依賴包
- 所有數據僅供學習交流使用
- 建議使用時設置合理的時間范圍
- 如遇到問題可查看日志文件
五、更新記錄
v1.0.0 (2024-12-05)
- 實現基礎數據爬取功能
- 支持生成分析報告
- 支持繪制價格走勢圖
- 支持導出Excel數據
六、聯系方式
作者: xiaohai
僅用于學習交流