?
爬取網站鏈接:https://lishi.tianqi.com/xiamen/202312.html
爬取了廈門市2023年一整年的天氣數據,包括最高溫,最低溫,天氣,風力風向等?
爬蟲代碼:
import requests
import pandas as pd
import csv
from pyecharts.charts import Bar,Timeline
import pyecharts.options as opts
import parself = open('天氣數據.csv',mode='w',encoding='utf-8',newline='')
csv_writer = csv.DictWriter(f,fieldnames=['日期','星期','最高溫','最低溫','天氣','風向','風力'])
csv_writer.writeheader()
cookies = {'cityPy': 'xiamen','cityPy_expire': '1721098187','UserId': '17204933865319972','Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493387','HMACCOUNT': '4A9167DA75AB7059','Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493402','Hm_lvt_7c50c7060f1f743bccf8c150a646e90a': '1720493523','Hm_lvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529','Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529','Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a': '1720493646',
}headers = {'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7','accept-language': 'zh-CN,zh;q=0.9','cache-control': 'no-cache',# 'cookie': 'cityPy=xiamen; cityPy_expire=1721098187; UserId=17204933865319972; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493387; HMACCOUNT=4A9167DA75AB7059; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493402; Hm_lvt_7c50c7060f1f743bccf8c150a646e90a=1720493523; Hm_lvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a=1720493646','pragma': 'no-cache','priority': 'u=0, i','referer': 'https://lishi.tianqi.com/xiamen/202302.html','sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"','sec-fetch-dest': 'document','sec-fetch-mode': 'navigate','sec-fetch-site': 'same-origin','sec-fetch-user': '?1','upgrade-insecure-requests': '1','user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
}
for i in range(1,13):if i < 10:i = '0'+str(i)url = f'https://lishi.tianqi.com/xiamen/2023{i}.html'response = requests.get(url=url, cookies=cookies, headers=headers)response.encoding=response.apparent_encodinghtml = response.textselector = parsel.Selector(html)li_list = selector.css('div.tian_three ul.thrui li')for li in li_list:date_time = li.css('div.th200::text').get().split(' ')[0]xingqi_time = li.css('div.th200::text').get().split(' ')[1]all_list = li.css('div.th140::text').getall()max_temp = all_list[0]min_temp = all_list[1]weather = all_list[2]wind_orient = all_list[3].split(' ')[0]wind_rank = all_list[3].split(' ')[1]dit = {'日期': date_time,'星期': xingqi_time,'最高溫': max_temp,'最低溫': min_temp,'天氣': weather,'風向': wind_orient,'風力': wind_rank,}csv_writer.writerow(dit)print(date_time,xingqi_time,max_temp,min_temp,weather,wind_orient,wind_rank)
保存到csv文件:
?
接下來讀取文件,對文件格式做調整,以進行繪圖操作。?
df = pd.read_csv('天氣數據.csv')
print(df.info())
df['日期'] = pd.to_datetime(df['日期'])
df['month_'] = df['日期'].dt.to_period('M')
new_data = df.groupby(['month_','天氣']).size().reset_index()
new_data.columns = ['month','weather','count'] #改變列名
print(df.head)
print(new_data)
print(new_data[new_data['month']=='2023-01'][['weather','count']].sort_values(by='count',ascending=False).values.tolist()) # 條件篩選,獲取一月份的天氣和數量值,獲取的是數據框格式
#.sort_values(by='count',ascending=False).values.tolist() 這段代碼是按升序排序,獲取值并轉變為雙列表格式
# [['多云', 14], ['小雨', 5], ['晴', 5], ['霧', 4], ['陰', 2], ['中雨', 1]]
new_data數據格式如下:?
?
現在進行繪圖操作:
# 實例化一個時間序列的對象
timeline = Timeline()
timeline.add_schema(play_interval=1000) #單位是毫秒
for month in new_data['month'].unique():data = (new_data[new_data['month'] == month][['weather', 'count']].sort_values(by='count',ascending=False).values.tolist())print(data)bar = (Bar().add_xaxis([x[0] for x in data]).add_yaxis('',[x[1] for x in data]).reversal_axis() #坐標軸倒轉.set_global_opts(title_opts=opts.TitleOpts(title='廈門市2023年每月天氣變化')).set_series_opts(label_opts=opts.LabelOpts(position='right')))timeline.add(bar,f'{month}')timeline.render('天氣輪播圖.html')
?
點擊輪播圖下方的按鈕就可以進行播放了。?
?