## ================= 基本設置 =================
## 1) 設定你的工作目錄(保持你的原路徑不變)
setwd("D:/*****/*****/******")## 2) 文件名(與xlsx實際名字保持一致)
xlsx_file <- "驅動因素(中低收入).xlsx"## ================= 依賴包 =================
need_pkgs <- c("readxl", "GD")
to_install <- need_pkgs[!need_pkgs %in% installed.packages()[, "Package"]]
if (length(to_install) > 0) install.packages(to_install)
library(readxl)
library(GD)## ================= 配置參數 =================
discmethod <- c("equal","natural","quantile","geometric","sd") # 離散化方法
discitv <- 3:6 # 分成5~10類
x_vars <- paste0("X", 1:18) # X1~X18
y_var <- "Y"## 你給的7個子表名(與excel中的工作表名保持一致)
target_sheets <- c("1","2","3","4","5","6","7")## ================= 輔助函數 =================
sanitize_filename <- function(x) {# 用于把工作表名安全地轉為文件名x <- gsub("[\\\\/:*?\"<>|]", "_", x)x <- gsub("\\s+", "_", x)enc2utf8(x)
}coerce_numeric <- function(df, cols) {# 強制把指定列轉為數值(如果有字符/因子)for (cc in cols) {if (cc %in% names(df)) {# 保留純數字或可轉的,其他變 NAdf[[cc]] <- suppressWarnings(as.numeric(df[[cc]]))}}df
}## ================= 主流程:逐表批量運算 =================
# 讀取excel文件的所有表名,校驗是否包含目標表
all_sheets <- readxl::excel_sheets(xlsx_file)
missing_sheets <- setdiff(target_sheets, all_sheets)
if (length(missing_sheets) > 0) {warning(sprintf("以下工作表在 %s 中未找到:%s",xlsx_file, paste(missing_sheets, collapse = ", ")))
}
sheets_to_run <- intersect(target_sheets, all_sheets)if (length(sheets_to_run) == 0) {stop("未找到可運行的工作表,請檢查 xlsx 文件及表名。")
}for (sh in sheets_to_run) {cat(sprintf(">>> 正在處理工作表:%s ...\n", sh))# 讀取當前工作表df <- readxl::read_excel(path = xlsx_file, sheet = sh)df <- as.data.frame(df, stringsAsFactors = FALSE)# 檢查必要列need_cols <- c(y_var, x_vars)lost_cols <- setdiff(need_cols, names(df))if (length(lost_cols) > 0) {warning(sprintf("工作表 %s 缺少列:%s,跳過該表。", sh, paste(lost_cols, collapse = ", ")))next}# 類型轉換:確保 Y 與 X1~X18 為數值df <- coerce_numeric(df, need_cols)# 去除在 Y 或任一 X 上有缺失的行keep <- complete.cases(df[, need_cols])na_drop_n <- sum(!keep)if (na_drop_n > 0) {message(sprintf("工作表 %s:剔除含缺失的行 %d 行。", sh, na_drop_n))}df_clean <- df[keep, , drop = FALSE]# 構建公式fm <- as.formula(paste0(y_var, " ~ ", paste(x_vars, collapse = " + ")))# 運行地理探測器 gdmdatagdm <- GD::gdm(formula = fm,continuous_variable = x_vars, # 都是連續變量data = df_clean,discmethod = discmethod,discitv = discitv)# 將結果保存到 txtout_name <- paste0("GDM結果_", sanitize_filename(sh), ".txt")out_path <- file.path(getwd(), out_name)res_txt <- capture.output({cat("文件:", xlsx_file, "\n", sep = "")cat("工作表:", sh, "\n", sep = "")cat("公式:", deparse(fm), "\n", sep = "")cat("離散化方法:", paste(discmethod, collapse = ", "), "\n", sep = "")cat("分組數:", paste(discitv, collapse = ", "), "\n\n", sep = "")print(datagdm)})writeLines(res_txt, out_path, useBytes = TRUE)cat(sprintf("保存結果:%s\n", out_path))## 可選:如需同時保存每個表的圖,取消以下注釋# png_name <- paste0("GDM圖_", sanitize_filename(sh), ".png")# png(file.path(getwd(), png_name), width = 1000, height = 800, res = 120)# plot(datagdm)# dev.off()# cat(sprintf("保存圖形:%s\n", file.path(getwd(), png_name)))
}cat("=== 全部完成 ===\n")
假設excel文件中有7個子表,每個子表都要進行一遍最優地理探測器運算,之前的做法是分別把子表另存為新文件,然后一個一個跑,此代碼實現了一次性批量計算子表的相關指標,并保存為txt文件。此代碼假設所有變量都是連續變量,如果有類別變量,可結合AI進一步調整修改