輕松獲取全網熱點,Python助你掌握實時輿情
在信息爆炸的時代,抖音、小紅書、微博、今日頭條和百度熱點等平臺每天都會產生海量熱門內容。無論是品牌營銷、競品分析還是輿情監控,掌握這些熱點新聞至關重要。利用Python的強大爬蟲技術,可以自動化抓取各平臺的熱點數據,實時追蹤話題趨勢,挖掘用戶關注焦點。
通過Python的高效爬取和數據分析能力,不僅能快速獲取結構化熱點信息,還能結合自然語言處理(NLP)進行情感分析,洞察公眾情緒變化。無論是企業決策、內容創作還是市場研究,這一技術都能提供精準的數據支持,讓你在信息戰中搶占先機!
代碼
# pip install beautifulsoup4
import requests
import random
from typing import Optional, List, Dict
from bs4 import BeautifulSoup# 平臺名稱映射
PLATFORMS = [{"name": "微博", "zhiwei_id": "weibo", "tophub_id": "s.weibo.com"},{"name": "抖音", "zhiwei_id": "douyin", "tophub_id": "douyin.com"},{"name": "嗶哩嗶哩", "zhiwei_id": "bilibili", "tophub_id": "bilibili.com"},{"name": "今日頭條", "zhiwei_id": "toutiao", "tophub_id": "toutiao.com"},{"name": "百度熱點", "zhiwei_id": "baidu", "tophub_id": "baidu.com"},{"name": "小紅書", "zhiwei_id": "little-red-book", "tophub_id": None},{"name": "快手", "zhiwei_id": "kuaishou", "tophub_id": None},{"name": "虎撲", "zhiwei_id": None, "tophub_id": "hupu.com"},{"name": "豆瓣小組", "zhiwei_id": None, "tophub_id": "douban.com"},{"name": "澎湃新聞", "zhiwei_id": None, "tophub_id": "thepaper.cn"},{"name": "知乎熱榜", "zhiwei_id": "zhihu", "tophub_id": "zhihu.com"},
]# 知微數據支持的平臺
ZHIWEI_PLATFORMS = [p["zhiwei_id"] for p in PLATFORMS if p["zhiwei_id"]]# tophub 支持的平臺
TOPHUB_PLATFORMS = [p["tophub_id"] for p in PLATFORMS if p["tophub_id"]]def get_zhiwei_hotnews(platform: str) -> Optional[List[Dict]]:"""獲取知微數據的熱點數據參數 platform: 平臺標識 (weibo, douyin, bilibili, toutiao, baidu, little-red-book, kuaishou, zhihu)返回格式: 列表數據,每個元素為熱點條目字典,僅包含 name, rank, lastCount, url"""api_url = f"https://trends.zhiweidata.com/hotSearchTrend/search/longTimeInListSearch?type={platform}&sortType=realTime" # noqa 501try:headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", # noqa 501"Referer": "https://trends.zhiweidata.com/",}response = requests.get(api_url, headers=headers, timeout=10)response.raise_for_status()data = response.json()if data.get("state") and isinstance(data.get("data"), list):return [{"name": item.get("name", ""),"rank": item.get("rank", 0),"lastCount": item.get("lastCount", 0),"url": item.get("url", ""),}for item in data["data"]]return Noneexcept Exception as e: # noqa 841return Nonedef get_tophub_hotnews(platform: str, cnt: int = 10) -> Optional[List[Dict]]:"""獲取 tophub.today 的熱點數據參數 platform: 平臺名稱(中文,如“微博”)參數 tophub_id: tophub.today 的平臺標識(如 s.weibo.com, zhihu.com)參數 cnt: 返回的新聞數量返回格式: 列表數據,每個元素為熱點條目字典,包含 name, rank, lastCount"""api_url = "https://tophub.today/"try:headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", # noqa 501}response = requests.get(api_url, headers=headers, timeout=10)response.raise_for_status()soup = BeautifulSoup(response.text, "html.parser")platform_divs = soup.find_all("div", class_="cc-cd")for div in platform_divs:platform_span = div.find("div", class_="cc-cd-lb").find("span")if platform_span and platform_span.text.strip() == platform:news_items = div.find_all("div", class_="cc-cd-cb-ll")[:cnt]hotnews = []for item in news_items:rank = item.find("span", class_="s").text.strip()title = item.find("span", class_="t").text.strip()engagement = item.find("span", class_="e")last_count = engagement.text.strip() if engagement else "0"hotnews.append({"name": title,"rank": int(rank),"lastCount": last_count,"url": item.find("a")["href"] if item.find("a") else "",})return hotnewsreturn Noneexcept Exception as e: # noqa 841return Nonedef get_vvhan_hotnews() -> Optional[List[Dict]]:"""獲取 vvhan 的熱點數據(作為備用)返回格式: [{"name": platform_name, "data": [...]}, ...]"""api_url = "https://api.vvhan.com/api/hotlist/all"try:response = requests.get(api_url, timeout=10)response.raise_for_status()data = response.json()if data.get("success") and isinstance(data.get("data"), list):return data["data"]return Noneexcept Exception as e: # noqa 841return Nonedef get_platform_news(platform: str, cnt: int = 10) -> List[str]:"""獲取指定平臺的新聞標題,優先從知微數據獲取,失敗則從 tophub.today 獲取,最后從 vvhan 獲取參數 platform: 平臺名稱(中文,如“微博”)參數 cnt: 返回的新聞數量返回: 新聞標題列表(僅使用 name 字段)"""# 查找平臺對應的知微數據標識和 tophub 標識platform_info = next((p for p in PLATFORMS if p["name"] == platform), None)if not platform_info:return []# 1. 優先嘗試知微數據if platform_info["zhiwei_id"] in ZHIWEI_PLATFORMS:hotnews = get_zhiwei_hotnews(platform_info["zhiwei_id"])if hotnews:return [item.get("name", "") for item in hotnews[:cnt] if item.get("name")]# 2. 回退到 tophub.todayif platform_info["tophub_id"] in TOPHUB_PLATFORMS:hotnews = get_tophub_hotnews(platform, cnt)if hotnews:return [item.get("name", "") for item in hotnews[:cnt] if item.get("name")]# 3. 回退到 vvhan APIhotnews = get_vvhan_hotnews()if not hotnews:return []platform_data = next((pf["data"] for pf in hotnews if pf["name"] == platform), [])return [item["title"] for item in platform_data[:cnt]]def select_platform_topic(platform: str, cnt: int = 10) -> str:"""獲取指定平臺的新聞話題,并按排名加權隨機選擇一個話題。若無話題,返回默認話題。參數 platform: 平臺名稱(中文,如“微博”)參數 cnt: 最大返回的新聞數量返回: 選中的話題字符串"""topics = get_platform_news(platform, cnt)if not topics:topics = ["歷史上的今天"]print(f"平臺 {platform} 無法獲取到熱榜,接口暫時不可用,將使用默認話題。")# 加權隨機選擇:排名靠前的話題權重更高weights = [1 / (i + 1) ** 2 for i in range(len(topics))]selected_topic = random.choices(topics, weights=weights, k=1)[0]# 替換標題中的 | 為 ——selected_topic = selected_topic.replace("|", "——")return selected_topic
topics = get_platform_news("小紅書", 50)
topics