Python批量生成N天前的多word個文件，并根據excel統計數據，修改word模板，合并多個word文件

1，需求

根據word模板文件，生成多個帶日期后綴的word文件
根據excel-每日告警統計數量，逐個修改當日的文檔

2，實現

shell腳本：根據word模板文件，生成多個帶日期后綴的word文件

#!/bin/bash
# 生成近一年日期  日期格式：YYYYMMDD
#要復制的文檔名稱
baogao_doc_prename="巡檢報告"
baogao_doc=$baogao_doc_prename".docx"#新文件生產后的目錄
dest_dir=".\\"# 設置開始、結束時間
start_date=$(date -d "20250725" +%Y%m%d)  
end_date=$(date   -d "20250726" +%Y%m%d)  # 定義節假日數組（需用戶自行補充）
# 格式：("YYYYMMDD" "YYYYMMDD" ...)
holidays=("20240101"  #"20250101" "20250201"   # 示例：元旦、春節（替換為實際節假日）
)# 循環生成日期并過濾節假日
current_sec=$(date -d "$start_date" +%s)
end_sec=$(date -d "$end_date" +%s)
day_count=0echo "近一年日期（排除節假日）:"
while [ "$current_sec" -le "$end_sec" ]; docurrent_date=$(date -d "@$current_sec" +%Y%m%d)#復制文件cp $baogao_doc   $dest_dir/$baogao_doc_prename"-$current_date.docx"# 檢查當前日期是否在節假日列表中if [[ ! " ${holidays[@]} " =~ " $current_date " ]]; thenecho "$current_date"((day_count++))fi# 增加一天（86400秒）current_sec=$((current_sec + 86400))
doneecho "生成完成！有效日期數: $day_count"

python腳本：根據excel-每日告警統計數量，逐個修改當日的文檔

import pandas as pd
from docx import Document
from docx.table import _Cell
from docx.text.paragraph import Paragraph
import os
import re
from datetime import datetime, timedeltadef get_previous_day_filename(filename):# 定義日期格式的正則表達式date_pattern = r'(\d{8})'  # 匹配8位數字的日期格式# 查找文件名中的日期部分match = re.search(date_pattern, filename)if not match:print("錯誤: 文件名中未找到日期部分!")return None# 提取日期字符串并轉換為日期對象date_str = match.group(1)try:date_obj = datetime.strptime(date_str, '%Y%m%d')except ValueError:print(f"錯誤: 日期格式不正確，應為YYYYMMDD，但得到了{date_str}")return None# 計算前一天的日期previous_day = date_obj - timedelta(days=1)previous_day_str = previous_day.strftime('%Y%m%d')# 替換文件名中的日期部分previous_day_filename = re.sub(date_pattern, previous_day_str, filename)return previous_day_filename#遍歷段落并替換占位符
def replact_word_item( doc,  replacements ):for paragraph in doc.paragraphs:for key, value in replacements.items():if key in paragraph.text:# 替換文本內容（保留原有格式）inline = paragraph.runsfor run in inline:run.text = run.text.replace(key, str(value) )def edit_xjbaogao_table(doc, excel_row, word_filename):# 獲取巡檢報告表（根據文檔結構定位第一個表格）inspection_table = doc.tables[0]total_result=[]# 遍歷表格行（跳過表頭行）for table_row in inspection_table.rows[1:]: #excel_index=excel_row.index   #excel 表格字段： @timestamp	Test1告警	Test2告警#巡檢報告word表格： 巡檢項目（系統） 巡檢內容（事項） 正常與否	備注# 獲取功能點名稱（第一列）--系統，第二列）--巡檢內容system = table_row.cells[0].text.strip()  #系統feature = table_row.cells[1].text.strip() #巡檢內容# Test1模塊巡檢if system == 'Test1':alertCnt=int(excel_row['Test1告警'])if  alertCnt > 0:# 1. 更新（第4列，索引3）： 備注result=str(alertCnt)+"次Test1告警"table_row.cells[3].text = "有"+ result +"，均已反饋"#有xx告警，均已反饋                                      # 2. 更新（第3列，索引2）： 巡檢情況--- 判斷值>0則勾選“不正常”              table_row.cells[2].text = "\r正常□\r不正常?"total_result.append(result)# Test2模塊巡檢if system == 'Test2':alertCnt=int(excel_row['Test2告警'])if  alertCnt > 0:# 1. 更新（第4列，索引3）： 備注result=str(alertCnt)+"次Test2告警"table_row.cells[3].text = "有"+ result +"，均已反饋"#有xx告警，均已反饋                                      # 2. 更新（第3列，索引2）： 巡檢情況--- 判斷值>0則勾選“不正常”              table_row.cells[2].text = "\r正常□\r不正常?"total_result.append(result)# 巡檢結論   strresult = "，".join(total_result)xunjian_result="有"+strresult+"，均已反饋。"# 有xx1告警，xx2告警，均已反饋  if system == '巡檢結論' and  "告警" in xunjian_result:            # 1. 更新（第2列，索引1）： 巡檢結論  table_row.cells[1].text = xunjian_result   print("xunjian_result===>",xunjian_result) #四、結論        #    + 拼接上次告警結論【上次出現的xx告警，xx2告警，均已處理。】 former_word_filename= get_previous_day_filename(word_filename) #xx巡檢報告     former_result=''if os.path.exists(former_word_filename):print(f"獲取前一天巡檢報告文件{former_word_filename}")try:# 打開Word文檔doc_former = Document(former_word_filename)except FileNotFoundError:print(f"未找到文件: {former_word_filename}，跳過處理")#continue   # 獲取巡檢報告表（根據文檔結構定位第一個表格）inspection_table_former = doc_former.tables[0]         # 遍歷表格行（跳過表頭行）for table_row in inspection_table_former.rows[1:]:                 #巡檢報告word表格：巡檢項目（系統）	巡檢內容（事項）	    正常與否	備注# 獲取功能點名稱（第二列）--巡檢內容system = table_row.cells[0].text.strip()  #系統if system == '巡檢結論':            # 1. 更新（第2列，索引1）： 巡檢結論  former_result=table_row.cells[1].text former_result=former_result.replace("有", "上次出現的").replace("均已反饋", "均已處理")xunjian_result+=  former_result  replacements = {"各業務模塊運行正常。":  xunjian_result}          if "告警" in xunjian_result:#print(xunjian_result)replact_word_item( doc,  replacements )  def update_word_remarks(excel_path, word_dir, word_file_prefix):"""從Excel讀取數據，更新對應時間戳的Word文件備注欄:param excel_path: Excel文件路徑:param word_dir: Word文件所在目錄"""# 讀取Excel數據df = pd.read_excel(excel_path, sheet_name='sheet1')# 遍歷Excel中的每一行數據for _, excel_row in df.iterrows():timestamp = str(excel_row['@timestamp'])# 構造對應Word文件名f1=word_file_prefix[0]#1, xx巡檢報告    word_filename = f"{word_dir}/{f1}-{timestamp}.docx"   if os.path.exists(word_filename):print(f"處理文件{word_filename}")try:# 打開Word文檔doc = Document(word_filename)except FileNotFoundError:print(f"未找到文件: {word_filename}，跳過處理")continue   # 匹配Excel中的字段并更新（第六列）：備注edit_xjbaogao_table(doc, excel_row, word_filename)# 保存修改后的Word文件doc.save(word_filename)if __name__ == "__main__":# 配置文件路徑（根據實際情況修改）EXCEL_PATH = "告警統計.xlsx"  # Excel文件路徑WORD_DIRECTORY = "."       # Word文件所在目錄（當前目錄用"."）word_file_prefix = ["巡檢報告" ]update_word_remarks(EXCEL_PATH, WORD_DIRECTORY,word_file_prefix)

3，結果

在這里插入圖片描述

4，合并多個word文件

pip install docxcompose

import os,re
from docx import Document
from docxcompose.composer import Composerdef extract_date_from_filename(filename):"""從文件名中提取日期，支持多種格式如：2024-03-15, 20240315, report_2024_03_15.docx"""basename = os.path.splitext(filename)[0]# 常見日期格式的正則patterns = [r'(\d{4})[-_]?(\d{2})[-_]?(\d{2})',  # 匹配 2024-03-15 或 20240315]for pattern in patterns:match = re.search(pattern, basename)if match:year, month, day = match.groups()return f"{year}-{month}-{day}"return "未知日期"def merge_word_files_with_titles(source_dir, output_file="merged_document.docx"):"""合并指定目錄下所有.docx文件，并在每個文檔內容前添加原文件名作為標題:param source_dir: 包含待合并Word文件的目錄路徑:param output_file: 合并后的輸出文件名"""# 獲取目錄中所有.docx文件并按文件名排序files = [os.path.join(source_dir, f) for f in os.listdir(source_dir) if f.endswith(".docx")]#files.sort()files.sort(key=lambda x: extract_date_from_filename(x))if not files:print("目錄中未找到.docx文件")return# 創建主文檔master = Document()composer = Composer(master)for file_path in files:# 提取原文件名（不含后綴）作為標題file_name = os.path.splitext(os.path.basename(file_path))[0]# 創建臨時文檔添加標題title_doc = Document()title_doc.add_heading(file_name, level=1)  # 一級標題composer.append(title_doc)# 添加原文檔內容content_doc = Document(file_path)composer.append(content_doc)# 添加分頁符（非最后一個文檔）if file_path != files[-1]:page_break = Document()page_break.add_page_break()composer.append(page_break)# 保存合并結果composer.save(output_file)print(f"合并完成！文件已保存至: {output_file}")# 示例用法
if __name__ == "__main__":merge_word_files_with_titles(source_dir=r"./old2",  # 替換為實際路徑output_file="./old-合并報告.docx")