R語言開發入門完整指南

R語言簡介

R是一種專為統計計算和圖形設計的編程語言，廣泛應用于數據分析、統計建模、機器學習和數據可視化。R語言具有以下特點：

開源免費：完全免費且源代碼開放
豐富的包生態：CRAN上有超過18000個包
強大的統計功能：內置豐富的統計函數
優秀的可視化：ggplot2等包提供強大的圖形功能
活躍的社區：全球有龐大的用戶和開發者社區

環境配置

1. 安裝R

Windows系統

訪問 R官方網站
點擊 “Download R”
選擇CRAN鏡像（推薦選擇中國鏡像）
點擊 “Download R for Windows”
下載 “base” 版本
運行安裝程序，按默認設置安裝

macOS系統

訪問CRAN網站
點擊 “Download R for macOS”
下載對應處理器版本的PKG文件
雙擊安裝

Linux系統 (Ubuntu/Debian)

# 更新包列表
sudo apt update# 安裝R
sudo apt install r-base r-base-dev# 驗證安裝
R --version

2. 安裝RStudio

RStudio是R語言最流行的集成開發環境（IDE）。

訪問 RStudio官網
下載RStudio Desktop免費版
選擇對應操作系統版本
按提示安裝

3. RStudio界面介紹

RStudio界面分為四個主要區域：

Source：代碼編輯器
Console：R控制臺
Environment/History：環境變量和歷史記錄
Files/Plots/Packages/Help：文件瀏覽器、圖形輸出、包管理和幫助

4. 配置R環境

# 查看R版本
version# 查看當前工作目錄
getwd()# 設置工作目錄
setwd("/path/to/your/directory")# 查看已安裝的包
installed.packages()# 查看R的搜索路徑
search()

包管理

1. 包的安裝

從CRAN安裝

# 安裝單個包
install.packages("ggplot2")# 安裝多個包
install.packages(c("dplyr", "tidyr", "readr"))# 指定鏡像安裝
install.packages("ggplot2", repos="https://cran.rstudio.com/")

從GitHub安裝

# 首先安裝devtools包
install.packages("devtools")# 從GitHub安裝包
devtools::install_github("username/packagename")

從Bioconductor安裝

# 安裝BiocManager
install.packages("BiocManager")# 通過BiocManager安裝生物信息學包
BiocManager::install("GenomicRanges")

2. 包的加載和管理

# 加載包
library(ggplot2)
# 或者
require(ggplot2)# 查看已加載的包
search()# 查看包的版本
packageVersion("ggplot2")# 更新包
update.packages()# 卸載包
remove.packages("packagename")# 查看包的幫助
help(package = "ggplot2")

3. 包依賴管理

# 查看包的依賴關系
tools::package_dependencies("ggplot2")# 安裝包及其所有依賴
install.packages("ggplot2", dependencies = TRUE)

基本語法

1. 基本操作

# 賦值操作
x <- 5          # 推薦方式
x = 5           # 也可以使用
5 -> x          # 較少使用# 輸出
print(x)
x               # 直接輸入變量名# 注釋
# 這是單行注釋# 基本運算
2 + 3           # 加法：5
5 - 2           # 減法：3
3 * 4           # 乘法：12
8 / 2           # 除法：4
2^3             # 乘方：8
10 %% 3         # 取余：1
10 %/% 3        # 整除：3

2. 變量命名規則

# 有效的變量名
my_variable <- 1
myVariable <- 2
variable1 <- 3
.hidden_var <- 4# 無效的變量名（會報錯）
# 1variable <- 5    # 不能以數字開頭
# my-variable <- 6  # 不能包含短橫線
# for <- 7          # 不能使用保留字

3. 數據檢查函數

# 查看對象類型
class(x)
typeof(x)
mode(x)# 查看對象結構
str(x)# 查看對象大小
length(x)
dim(x)          # 對于矩陣和數據框# 檢查數據類型
is.numeric(x)
is.character(x)
is.logical(x)
is.factor(x)

數據類型和結構

1. 基本數據類型

# 數值型 (numeric)
num1 <- 42
num2 <- 3.14159# 整數型 (integer)
int1 <- 42L
int2 <- as.integer(42)# 字符型 (character)
char1 <- "Hello"
char2 <- 'World'# 邏輯型 (logical)
log1 <- TRUE
log2 <- FALSE
log3 <- T        # TRUE的簡寫
log4 <- F        # FALSE的簡寫# 復數型 (complex)
comp1 <- 3 + 4i# 查看數據類型
class(num1)     # "numeric"
typeof(char1)   # "character"

2. 向量 (Vector)

# 創建向量
numeric_vec <- c(1, 2, 3, 4, 5)
char_vec <- c("a", "b", "c")
logical_vec <- c(TRUE, FALSE, TRUE)# 生成序列
seq1 <- 1:10                    # 1到10
seq2 <- seq(1, 10, by = 2)      # 1, 3, 5, 7, 9
seq3 <- seq(0, 1, length = 5)   # 等間距的5個數# 重復生成
rep1 <- rep(1, 5)               # 重復1五次
rep2 <- rep(c(1, 2), 3)         # 重復向量三次# 向量操作
length(numeric_vec)             # 向量長度
numeric_vec[1]                  # 第一個元素
numeric_vec[c(1, 3, 5)]         # 第1, 3, 5個元素
numeric_vec[-1]                 # 除第一個外的所有元素

3. 因子 (Factor)

# 創建因子
gender <- factor(c("M", "F", "M", "F", "M"))
education <- factor(c("High", "Medium", "Low"), levels = c("Low", "Medium", "High"),ordered = TRUE)# 查看因子
levels(gender)
nlevels(education)
summary(gender)

4. 矩陣 (Matrix)

# 創建矩陣
mat1 <- matrix(1:12, nrow = 3, ncol = 4)
mat2 <- matrix(1:12, nrow = 3, byrow = TRUE)# 矩陣操作
dim(mat1)                       # 維度
nrow(mat1)                      # 行數
ncol(mat1)                      # 列數
mat1[1, 2]                      # 第1行第2列元素
mat1[1, ]                       # 第1行
mat1[, 2]                       # 第2列# 矩陣運算
mat1 + mat2                     # 矩陣加法
mat1 * mat2                     # 元素乘法
mat1 %*% t(mat2)                # 矩陣乘法
t(mat1)                         # 轉置

5. 數組 (Array)

# 創建三維數組
arr <- array(1:24, dim = c(3, 4, 2))# 數組操作
dim(arr)
arr[1, 2, 1]                    # 訪問特定元素

6. 列表 (List)

# 創建列表
my_list <- list(numbers = 1:5,text = "hello",logical = c(TRUE, FALSE),matrix = matrix(1:6, nrow = 2)
)# 訪問列表元素
my_list$numbers                 # 使用$符號
my_list[["text"]]               # 使用雙括號
my_list[[1]]                    # 使用索引# 列表操作
names(my_list)                  # 獲取元素名稱
length(my_list)                 # 列表長度

7. 數據框 (Data Frame)

# 創建數據框
df <- data.frame(name = c("Alice", "Bob", "Charlie"),age = c(25, 30, 35),gender = c("F", "M", "M"),stringsAsFactors = FALSE
)# 查看數據框
head(df)                        # 前幾行
tail(df)                        # 后幾行
str(df)                         # 結構
summary(df)                     # 摘要統計# 訪問數據框
df$name                         # 列名訪問
df[, "age"]                     # 列名訪問
df[1, ]                         # 第一行
df[, 1:2]                       # 前兩列
df[df$age > 25, ]               # 條件篩選

數據操作

1. 數據導入導出

# 讀取CSV文件
df <- read.csv("data.csv", header = TRUE)
df <- read.csv("data.csv", stringsAsFactors = FALSE)# 讀取其他格式
df <- read.table("data.txt", sep = "\t", header = TRUE)
df <- read.delim("data.txt")# 使用readr包（推薦）
library(readr)
df <- read_csv("data.csv")# 讀取Excel文件
library(readxl)
df <- read_excel("data.xlsx", sheet = 1)# 導出數據
write.csv(df, "output.csv", row.names = FALSE)
write.table(df, "output.txt", sep = "\t")

2. 數據清洗

# 處理缺失值
is.na(df)                       # 檢查缺失值
complete.cases(df)              # 完整案例
na.omit(df)                     # 刪除含缺失值的行
df[!is.na(df$age), ]            # 刪除特定列的缺失值# 替換缺失值
df$age[is.na(df$age)] <- mean(df$age, na.rm = TRUE)# 重復值處理
duplicated(df)                  # 檢查重復
unique(df)                      # 去重
df[!duplicated(df), ]           # 刪除重復行

3. 使用dplyr進行數據操作

library(dplyr)# 篩選行
filter(df, age > 25)
filter(df, gender == "M", age > 30)# 選擇列
select(df, name, age)
select(df, -gender)             # 排除某列# 排序
arrange(df, age)                # 升序
arrange(df, desc(age))          # 降序# 創建新列
mutate(df, age_group = ifelse(age > 30, "Old", "Young"))# 分組匯總
group_by(df, gender) %>%summarise(avg_age = mean(age),count = n())# 管道操作
df %>%filter(age > 25) %>%select(name, age) %>%arrange(desc(age))

4. 數據重構

library(tidyr)# 寬格式轉長格式
wide_data <- data.frame(id = 1:3,A = c(1, 2, 3),B = c(4, 5, 6)
)long_data <- pivot_longer(wide_data, cols = c(A, B), names_to = "variable", values_to = "value")# 長格式轉寬格式
wide_again <- pivot_wider(long_data, names_from = variable, values_from = value)

統計分析

1. 描述性統計

# 基本統計量
mean(x)                         # 平均值
median(x)                       # 中位數
sd(x)                           # 標準差
var(x)                          # 方差
min(x)                          # 最小值
max(x)                          # 最大值
range(x)                        # 范圍
quantile(x)                     # 分位數
summary(x)                      # 摘要統計# 處理缺失值
mean(x, na.rm = TRUE)# 相關性分析
cor(x, y)                       # 相關系數
cor.test(x, y)                  # 相關性檢驗

2. 假設檢驗

# t檢驗
t.test(x)                       # 單樣本t檢驗
t.test(x, y)                    # 雙樣本t檢驗
t.test(x ~ group, data = df)    # 分組t檢驗# 卡方檢驗
chisq.test(table(x, y))# 方差分析
aov_result <- aov(y ~ group, data = df)
summary(aov_result)# 非參數檢驗
wilcox.test(x, y)               # Wilcoxon檢驗
kruskal.test(y ~ group, data = df)  # Kruskal-Wallis檢驗

3. 回歸分析

# 線性回歸
model <- lm(y ~ x, data = df)
summary(model)
plot(model)# 多元回歸
model2 <- lm(y ~ x1 + x2 + x3, data = df)# 邏輯回歸
logit_model <- glm(y ~ x, family = binomial, data = df)# 模型預測
predict(model, newdata = new_df)

數據可視化

1. 基礎繪圖

# 散點圖
plot(x, y)
plot(x, y, main = "Title", xlab = "X Label", ylab = "Y Label")# 線圖
plot(x, y, type = "l")# 條形圖
barplot(table(x))# 直方圖
hist(x)
hist(x, breaks = 20, main = "Histogram")# 箱線圖
boxplot(y ~ group, data = df)# 餅圖
pie(table(x))

2. ggplot2高級繪圖

library(ggplot2)# 基本語法
ggplot(data = df, aes(x = x_var, y = y_var)) +geom_point()# 散點圖
ggplot(df, aes(x = age, y = income)) +geom_point() +geom_smooth(method = "lm") +labs(title = "Age vs Income",x = "Age",y = "Income")# 條形圖
ggplot(df, aes(x = category)) +geom_bar() +theme_minimal()# 箱線圖
ggplot(df, aes(x = group, y = value)) +geom_boxplot() +facet_wrap(~ category)# 直方圖
ggplot(df, aes(x = value)) +geom_histogram(bins = 30, fill = "blue", alpha = 0.7)# 線圖
ggplot(df, aes(x = date, y = value, color = group)) +geom_line() +scale_x_date(date_labels = "%Y-%m")

3. 自定義主題

# 使用內置主題
p + theme_minimal()
p + theme_classic()
p + theme_bw()# 自定義主題
p + theme(plot.title = element_text(size = 16, hjust = 0.5),axis.text = element_text(size = 12),legend.position = "bottom"
)

編程結構

1. 條件語句

# if語句
x <- 5
if (x > 0) {print("x is positive")
} else if (x < 0) {print("x is negative")
} else {print("x is zero")
}# ifelse函數（向量化）
result <- ifelse(x > 0, "positive", "non-positive")# switch語句
grade <- "A"
switch(grade,"A" = "Excellent","B" = "Good","C" = "Average","D" = "Poor","Unknown")

2. 循環結構

# for循環
for (i in 1:10) {print(i)
}for (name in c("Alice", "Bob", "Charlie")) {print(paste("Hello", name))
}# while循環
i <- 1
while (i <= 5) {print(i)i <- i + 1
}# repeat循環
i <- 1
repeat {print(i)i <- i + 1if (i > 5) break
}

3. 函數定義

# 基本函數
my_function <- function(x, y = 2) {result <- x * yreturn(result)
}# 調用函數
my_function(5)          # 使用默認參數
my_function(5, 3)       # 指定所有參數# 匿名函數
sapply(1:5, function(x) x^2)# 函數式編程
library(purrr)
map(1:5, ~ .x^2)        # 等價于上面的sapply

4. 異常處理

# try函數
result <- try({x / 0
}, silent = TRUE)if (inherits(result, "try-error")) {print("發生錯誤")
}# tryCatch函數
result <- tryCatch({x / 0
}, error = function(e) {print(paste("錯誤:", e$message))return(NA)
}, warning = function(w) {print(paste("警告:", w$message))
})

實用技巧

1. 工作空間管理

# 查看工作空間對象
ls()                            # 列出所有對象
objects()                       # 同ls()# 刪除對象
rm(x)                           # 刪除特定對象
rm(list = ls())                 # 刪除所有對象# 保存和加載工作空間
save.image("workspace.RData")   # 保存整個工作空間
save(x, y, file = "data.RData") # 保存特定對象
load("workspace.RData")         # 加載工作空間

2. 性能優化

# 向量化操作（快）
x^2                             # 向量化# 避免循環（慢）
result <- numeric(length(x))
for (i in 1:length(x)) {result[i] <- x[i]^2
}# 使用apply函數族
apply(matrix, 1, mean)          # 對行應用函數
lapply(list, function)          # 對列表應用函數
sapply(vector, function)        # 簡化輸出的lapply# 預分配內存
result <- numeric(1000)         # 預分配
# 而不是逐步增長：result <- c(result, new_value)

3. 調試技巧

# 調試函數
debug(my_function)              # 進入調試模式
undebug(my_function)            # 退出調試模式# 追蹤錯誤
traceback()                     # 查看錯誤堆棧# 打印調試信息
print(x)
cat("x =", x, "\n")# 使用browser()暫停執行
my_function <- function(x) {browser()                     # 在此處暫停result <- x * 2return(result)
}

4. 內存管理

# 查看對象大小
object.size(x)
print(object.size(x), units = "MB")# 查看內存使用
memory.size()                   # Windows
gc()                            # 垃圾回收# 清理內存
rm(large_object)
gc()

5. 字符串處理

# 基本字符串操作
nchar("hello")                  # 字符串長度
substr("hello", 1, 3)           # 子字符串
paste("hello", "world")         # 連接
paste0("hello", "world")        # 無分隔符連接# 正則表達式
grep("pattern", text)           # 查找模式
gsub("old", "new", text)        # 替換
strsplit("a,b,c", ",")          # 分割# stringr包（推薦）
library(stringr)
str_length("hello")
str_sub("hello", 1, 3)
str_c("hello", "world")
str_detect("hello world", "world")
str_replace("hello", "l", "x")