文章目錄
- 需求分析
- 技術要點
- 程序流程
- 一些細節
- 核心代碼
- 表格的一些操作
- 心得體會
- 代碼匯總
需求分析
請設計一個程序,要求能夠統計分析分散在不同表格中的數萬條信息,以信息中的身份證號碼
或生源地代碼
字段為目標字段,統計每一年全國各省份及本省各市州的人數,還需要統計全國各個省份總的人數信息,此外,將每一年本省各市州的人數情況繪制動態時間線地圖,實現數據交互。
技術要點
- Excel 表格的讀取、寫入、單元格框線設置、統計圖繪制、增加表單。(openpyxl 模塊)
- 動態地圖的繪制。(pyecharts 模塊)
程序流程
一些細節
核心代碼
整個項目代碼匯總后雖然有四百余行代碼,但是核心代碼只有十幾行。 該代碼實現了表格數據的遍歷,之所以使用try-except
結構,是因為有些數據并不符合字典所規定的鍵值,故會產生 KeyError
異常。
# 遍歷身份證號碼for i in range(sheet.min_row+1, sheet.max_row + 1):id_card_value = str(sheet.cell(i, sfzh_index).value)id_card_value = id_card_value.replace(".", "")try:PROVINCE_COUNT_dict[id_card_value[:2]] += 1TOTAL_PROVINCE_COUNT_dict[id_card_value[:2]] += 1if id_card_value[:2] == "62":GANSU_CITY_COUNT_dict[id_card_value[:4]] += 1TOTAL_GANSU_CITY_COUNT_dict[id_card_value[:4]] += 1except KeyError:print("【KeyError】", id_card_value)
表格的一些操作
# 加載工作簿
workbook = load_workbook(filename=year+'.xlsx')
# 選擇工作表
sheet = workbook.active
# 表格行數
sheet.max_row
# 新增表單并設置單元格寬度
ws = wb.create_sheet("string")
ws.column_dimensions['A'].width=20
# 定義邊框框線
# 定義邊框樣式
border_style = Side(border_style="thin", color="FF000000")
border = Border(left=border_style, right=border_style, top=border_style, bottom=border_style)
cell_range = ws["A1:C63"] # 選擇單元格范圍并應用邊框
for row in cell_range:for cell in row:cell.border = border
# 插入柱狀圖
bar1 = BarChart()
bar1.height = 15
bar1.width = 30
bar1.type = "col"
# bar1.style = 10
bar1.title = "全國各省份數據"
bar1.y_axis.title = "人數"
bar1.x_axis.title = "省份"
labels = Reference(ws, min_col=1, min_row=6, max_row=39)
data = Reference(ws, min_col=2, min_row=5, max_row=39)
bar1.add_data(data, titles_from_data=True)
bar1.set_categories(labels)
ws.add_chart(bar1, "F2")
# 保存表格
wb.save("output.xlsx")
心得體會
- 在預處數據時,統一的文件名稱、文件格式及表格格式會給代碼編寫帶來方便。例如,
workbook = load_workbook(filename=year+'.xlsx')
,可以在循環中直接使用年份名稱的方式循環讀取文件名,實現一個變量既指代了文件名,又表達了年份信息的目的。 - 嘗試使用多個字典進行連續索引,保證每個字典僅有一個值。例如,
PROVINCE = {"11": "北京市"}
,TOTAL_PROVINCE_COUNT_dict = {"11": 0}
,在最終需要呈現給用戶北京市:0
的信息時,通過字典的連續索引實現,相比較使用多個值的方式,程序的耦合度更低。
代碼匯總
# encoding: utf-8
from openpyxl import load_workbook, Workbook
from openpyxl.styles import Border, Side
from openpyxl.chart import BarChart, LineChart, PieChart, Reference, ProjectedPieChart
from openpyxl.chart.series import DataPointfrom pyecharts.charts import Map, Geo, Timeline
from pyecharts import options as opts
from pyecharts.globals import ThemeTypeYEAR_RANGE = [ "91", "92", "93", "94", "95","96", "97", "98", "99", "00","01", "02", "03", "04", "05","06", "07", "08", "09", "10","11", "12", "13", "14", "15","16", "17", "18", "19", "20","21", "22", "23", "24", "25"]PROVINCE = {"11": "北京市", "12": "天津市", "13": "河北省", "14": "山西省", "15": "內蒙古自治區", "21": "遼寧省", "22": "吉林省", "23": "黑龍江省", "31": "上海市", "32": "江蘇省", "33": "浙江省", "34": "安徽省", "35": "福建省", "36": "江西省", "37": "山東省", "41": "河南省", "42": "湖北省", "43": "湖南省", "44": "廣東省", "45": "廣西壯族自治區", "46": "海南省", "50": "重慶市", "51": "四川省", "52": "貴州省", "53": "云南省", "54": "西藏自治區", "61": "陜西省", "62": "甘肅省", "63": "青海省", "64": "寧夏回族自治區", "65": "新疆維吾爾自治區", "71": "臺灣省", "81": "香港特別行政區", "82": "澳門特別行政區"
}GANSU_CITY = {'6201': '蘭州市', '6202': '嘉峪關市', '6203': '金昌市', '6204': '白銀市', '6205': '天水市', '6206': '武威市', '6207': '張掖市', '6208': '平涼市', '6209': '酒泉市', '6210': '慶陽市', '6211': '定西市', '6212': '隴南市', '6221': '酒泉市', '6222': '張掖市', '6223': '武威市', '6224': '定西市', '6226': '隴南市', '6227': '平涼市', '6228': '慶陽市', '6229': '臨夏回族自治州', '6230': '甘南藏族自治州'
}TOTAL_PROVINCE_COUNT_dict = {"11": 0, "12": 0, "13": 0, "14": 0, "15": 0, "21": 0, "22": 0, "23": 0, "31": 0, "32": 0, "33": 0, "34": 0, "35": 0, "36": 0, "37": 0, "41": 0, "42": 0, "43": 0, "44": 0, "45": 0, "46": 0, "50": 0, "51": 0, "52": 0, "53": 0, "54": 0, "61": 0, "62": 0, "63": 0, "64": 0, "65": 0, "71": 0, "81": 0, "82": 0}TOTAL_GANSU_CITY_COUNT_dict = {'6201': 0, '6202': 0, '6203': 0, '6204': 0, '6205': 0, '6206': 0, '6207': 0, '6208': 0, '6209': 0, '6210': 0, '6211': 0, '6212': 0, '6221': 0, '6222': 0, '6223': 0, '6224': 0, '6226': 0, '6227': 0, '6228': 0, '6229': 0, '6230': 0}# 保存畢業生總數
count_stdudent_1 = 0
count_stdudent_1_list = []
# 明晰生源地的畢業生總數
count_stdudent_2 = 0
# 繪制MAP
t = Timeline(init_opts=opts.InitOpts(width="2000px", height="1000px"))# 保存輸出結果
wb = Workbook()# 將每年數據視為sheet,獨立統計
for year in YEAR_RANGE:print()print("【年份】:", year)print("===")# 加載工作簿workbook = load_workbook(filename=year+'.xlsx')# 選擇工作表sheet = workbook.activeprint("畢業生總數:{}\n".format(sheet.max_row-1))count_stdudent_1 += sheet.max_row-1count_stdudent_1_list.append(sheet.max_row-1)# 獲取最小和最大行數# print(year, sheet.min_column, sheet.max_column, sheet.min_row, sheet.max_row)# 跳過如下沒有身份證號碼的年份if year in ["91", "92", "93", "94", "95", "96", "97", "98", "99", "00", "01"]:continuecount_stdudent_2 += sheet.max_row-1header_list = []sfzh_index = -1# 遍歷表頭for j in range(sheet.min_column, sheet.max_column + 1):cell_value = sheet.cell(sheet.min_row, j).valueheader_list.append(cell_value)if cell_value == "sfzh" or cell_value == "Syszddm":sfzh_index = j# print("年份:{}\t, 表頭長度:{}\t, 表頭:{}".format(year, len(header_list), header_list))PROVINCE_COUNT_dict = {"11": 0, "12": 0, "13": 0, "14": 0, "15": 0, "21": 0, "22": 0, "23": 0, "31": 0, "32": 0, "33": 0, "34": 0, "35": 0, "36": 0, "37": 0, "41": 0, "42": 0, "43": 0, "44": 0, "45": 0, "46": 0, "50": 0, "51": 0, "52": 0, "53": 0, "54": 0, "61": 0, "62": 0, "63": 0, "64": 0, "65": 0, "71": 0, "81": 0, "82": 0}GANSU_CITY_COUNT_dict = {'6201': 0, '6202': 0, '6203': 0, '6204': 0, '6205': 0, '6206': 0, '6207': 0, '6208': 0, '6209': 0, '6210': 0, '6211': 0, '6212': 0, '6221': 0, '6222': 0, '6223': 0, '6224': 0, '6226': 0, '6227': 0, '6228': 0, '6229': 0, '6230': 0}# 遍歷身份證號碼for i in range(sheet.min_row+1, sheet.max_row + 1):id_card_value = str(sheet.cell(i, sfzh_index).value)id_card_value = id_card_value.replace(".", "")try:PROVINCE_COUNT_dict[id_card_value[:2]] += 1TOTAL_PROVINCE_COUNT_dict[id_card_value[:2]] += 1if id_card_value[:2] == "62":GANSU_CITY_COUNT_dict[id_card_value[:4]] += 1TOTAL_GANSU_CITY_COUNT_dict[id_card_value[:4]] += 1except KeyError:print("【KeyError】", id_card_value)ws = wb.create_sheet(year)ws.column_dimensions['A'].width=20ws.column_dimensions['B'].width=14ws.append(list(("年份:", year)))ws.append(list(("畢業生總數:", sheet.max_row-1)))ws.append(list(()))print("\n全國各省份數據:")ws.append(list(("全國各省份數據:", "")))ws.append(list(("省份名稱", "生源地人數", "人數占比")))print("---")for k, v in PROVINCE_COUNT_dict.items():print("{}\t{}\t{:.2f}".format(PROVINCE[k], v, v/(sheet.max_row-1)))ws.append(list((PROVINCE[k], v, v/(sheet.max_row-1))))ws.append(list(()))print("\n甘肅省內數據:")ws.append(list(("甘肅省內數據:", "")))ws.append(list(("市州名稱", "生源地人數", "人數占比")))print("---")MAP_GANSU_PROVINCE_COUNT_dict = {}for k, v in GANSU_CITY_COUNT_dict.items():print("{}\t{}\t{:.2f}".format(GANSU_CITY[k], v, v/PROVINCE_COUNT_dict["62"]))ws.append(list((GANSU_CITY[k], v, v/PROVINCE_COUNT_dict["62"])))MAP_GANSU_PROVINCE_COUNT_dict[GANSU_CITY[k]] = vmap0 = (Map().add("年份", list(MAP_GANSU_PROVINCE_COUNT_dict.items()), "甘肅").set_global_opts(title_opts=opts.TitleOpts(title="甘肅各市州生源地人數"),visualmap_opts=opts.VisualMapOpts(max_=400, min_=0)))t.add(map0, "{} 年".format(year))# 定義邊框樣式border_style = Side(border_style="thin", color="FF000000")border = Border(left=border_style, right=border_style, top=border_style, bottom=border_style)# 選擇單元格范圍并應用邊框cell_range = ws["A1:C63"]for row in cell_range:for cell in row:cell.border = borderbar1 = BarChart()bar1.height = 15bar1.width = 30bar1.type = "col"# bar1.style = 10bar1.title = "全國各省份數據"bar1.y_axis.title = "人數"bar1.x_axis.title = "省份"labels = Reference(ws, min_col=1, min_row=6, max_row=39)data = Reference(ws, min_col=2, min_row=5, max_row=39)bar1.add_data(data, titles_from_data=True)bar1.set_categories(labels)ws.add_chart(bar1, "F2")bar2 = BarChart()bar2.height = 15bar2.width = 30bar2.type = "col"# bar2.style = 10bar2.title = "甘肅省內數據"bar2.y_axis.title = "人數"bar2.x_axis.title = "市州"labels = Reference(ws, min_col=1, min_row=43, max_row=63)data = Reference(ws, min_col=2, min_row=42, max_row=63)bar2.add_data(data, titles_from_data=True)bar2.set_categories(labels)ws.add_chart(bar2, "F35")# 以下為匯總數據
ws = wb.create_sheet("匯總")
ws.column_dimensions['A'].width=26
ws.column_dimensions['B'].width=14
ws.append(list(("匯總", "")))
ws.append(list(("畢業生總數(02-25):", count_stdudent_2)))
ws.append(list(()))print("\n全國各省份數據(匯總)")
ws.append(list(("全國各省份畢業生總數:", count_stdudent_2-TOTAL_PROVINCE_COUNT_dict["62"])))
ws.append(list(("省份名稱", "生源地人數", "人數占比")))
print("---")
for k, v in TOTAL_PROVINCE_COUNT_dict.items():print("{}\t{}\t{:.2f}".format(PROVINCE[k], v, v/count_stdudent_2))ws.append(list((PROVINCE[k], v, v/count_stdudent_2)))
ws.append(list(()))print("\n甘肅省內數據(匯總)")
ws.append(list(("甘肅省內畢業生總數:", TOTAL_PROVINCE_COUNT_dict["62"])))
ws.append(list(("市州名稱", "生源地人數", "人數占比")))
print("---")
for k, v in TOTAL_GANSU_CITY_COUNT_dict.items():print("{}\t{}\t{:.2f}".format(GANSU_CITY[k], v, v/TOTAL_PROVINCE_COUNT_dict["62"]))ws.append(list((GANSU_CITY[k], v, v/TOTAL_PROVINCE_COUNT_dict["62"])))# 定義邊框樣式
border_style = Side(border_style="thin", color="FF000000")
border = Border(left=border_style, right=border_style, top=border_style, bottom=border_style)
# 選擇單元格范圍并應用邊框
cell_range = ws["A1:C63"]
for row in cell_range:for cell in row:cell.border = borderbar1 = BarChart()
bar1.height = 15
bar1.width = 30
bar1.type = "col"
# bar1.style = 10
bar1.title = "全國各省份數據"
bar1.y_axis.title = "人數"
bar1.x_axis.title = "省份"
labels = Reference(ws, min_col=1, min_row=6, max_row=39)
data = Reference(ws, min_col=2, min_row=5, max_row=39)
bar1.add_data(data, titles_from_data=True)
bar1.set_categories(labels)
ws.add_chart(bar1, "F2")bar2 = BarChart()
bar2.height = 15
bar2.width = 30
bar2.type = "col"
# bar2.style = 10
bar2.title = "甘肅省內數據"
bar2.y_axis.title = "人數"
bar2.x_axis.title = "市州"
labels = Reference(ws, min_col=1, min_row=43, max_row=63)
data = Reference(ws, min_col=2, min_row=42, max_row=63)
bar2.add_data(data, titles_from_data=True)
bar2.set_categories(labels)
ws.add_chart(bar2, "F35")# 逐年人數統計
ws = wb.create_sheet("逐年人數統計")
ws.append(list(("年份", "人數")))
for i in range(len(YEAR_RANGE)):ws.append((YEAR_RANGE[i], count_stdudent_1_list[i]))
line = LineChart() # 折線圖
line.x_axis.title = "年份"
line.y_axis.title = "人數"
line.title = "每年畢業生數量"
xlabel = Reference(ws, min_col=1, min_row=2, max_row=ws.max_row)
data = Reference(ws, min_col=2, min_row=1, max_row=ws.max_row)
line.add_data(data, titles_from_data=True)
line.set_categories(xlabel)
ws.add_chart(line, "D3")wb.save("output.xlsx")
print("畢業生總數: ", count_stdudent_1)
print("明晰生源地的畢業生總數: ", count_stdudent_2)
print("逐年人數:", count_stdudent_1_list)# data=[("廣東省",10430.03),("山東省",9579.31),("河南省",9402.36),("四川省",8041.82),("江蘇省",7865.99),("河北省",7185.42),("湖南省",6568.37),("安徽省",5950.1),("浙江省",5442),("湖北省",5723.77),("廣西壯族自治區",4602.66),("云南省",4596.6),("江西省",4456.74),("遼寧省",4374.63),("黑龍江省",3831.22),("陜西省",3732.74),("山西省",3571.21),("福建省",3552),("重慶市",2884),("貴州省",3476.65),("吉林省",2746.22),("甘肅省",2557.53),("內蒙古自治區",2470.63),("上海市",2301.391),("臺灣省",2316.2),("新疆維吾爾自治區",2181.33),("北京市",1961.2),("天津市",1293.82),("海南省",867.15),("香港特別行政區",709.76),("青海省",562.67),("寧夏回族自治區",630.14),("西藏自治區",300.21),("澳門特別行政區",55.23)]t.add_schema(is_auto_play=True, play_interval=1000) # 自動播放1000ms
t.render("gansu_map.html")