本文檔詳細解釋了一段使用 TensorFlow 構建和訓練混合密度網絡(Mixture Density Network, MDN)的代碼,涵蓋數據生成、模型構建、自定義損失函數與預測可視化等各個環節。
1. 導入庫與設置超參數
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
說明:
- 引入用于數值運算(NumPy)、構建深度學習模型(TensorFlow/Keras)和繪圖(Matplotlib)的基礎工具包。
超參數定義
N_HIDDEN = 15 # 隱藏層神經元數量
N_MIXES = 10 # GMM 中混合成分數量
OUTPUT_DIMS = 1 # 輸出維度(目標變量維度)
2. 自定義 MDN 層
class MDN(layers.Layer):def __init__(self, output_dims, num_mixtures, **kwargs):super(MDN, self).__init__(**kwargs)self.output_dims = output_dimsself.num_mixtures = num_mixturesself.params = self.num_mixtures * (2 * self.output_dims + 1) # pi, mu, sigmaself.dense = layers.Dense(self.params)def call(self, inputs):output = self.dense(inputs)return output
說明:
params
表示 GMM 每個分量包含mu
(均值)、sigma
(標準差)和pi
(權重),共2*D + 1
個參數。- 輸出維度為
(batch_size, num_mixtures * (2*output_dims + 1))
。
3. 自定義 MDN 損失函數
def get_mixture_loss_func(output_dims, num_mixtures):def mdn_loss(y_true, y_pred):y_true = tf.reshape(y_true, [-1, 1])out_mu = y_pred[:, :num_mixtures * output_dims]out_sigma = y_pred[:, num_mixtures * output_dims:2 * num_mixtures * output_dims]out_pi = y_pred[:, -num_mixtures:]mu = tf.reshape(out_mu, [-1, num_mixtures, output_dims])sigma = tf.exp(tf.reshape(out_sigma, [-1, num_mixtures, output_dims]))pi = tf.nn.softmax(out_pi)y_true = tf.tile(y_true[:, tf.newaxis, :], [1, num_mixtures, 1])normal_dist = tf.exp(-0.5 * tf.square((y_true - mu) / sigma)) / (sigma * tf.sqrt(2.0 * np.pi))prob = tf.reduce_prod(normal_dist, axis=2)weighted_prob = prob * piloss = -tf.math.log(tf.reduce_sum(weighted_prob, axis=1) + 1e-8)return tf.reduce_mean(loss)return mdn_loss
說明:
- 通過概率密度函數計算目標值屬于 GMM 各個分布的概率,并取加權平均。
- 對數似然函數取負作為損失。
4. 從輸出分布中采樣
def sample_from_output(y_pred, output_dims, num_mixtures, temp=1.0):out_mu = y_pred[:num_mixtures * output_dims]out_sigma = y_pred[num_mixtures * output_dims:2 * num_mixtures * output_dims]out_pi = y_pred[-num_mixtures:]out_sigma = np.exp(out_sigma)out_pi = np.exp(out_pi / temp)out_pi /= np.sum(out_pi)mixture_idx = np.random.choice(np.arange(num_mixtures), p=out_pi)mu = out_mu[mixture_idx * output_dims:(mixture_idx + 1) * output_dims]sigma = out_sigma[mixture_idx * output_dims:(mixture_idx + 1) * output_dims]sample = np.random.normal(mu, sigma)return sample
說明:
- 使用 softmax 處理
pi
,選擇一個分布后按對應的mu
和sigma
采樣。 temp
控制采樣溫度(溫度越高分布越平坦)。
5. 生成訓練數據
NSAMPLE = 3000
y_data = np.float32(np.random.uniform(-10.5, 10.5, NSAMPLE))
r_data = np.random.normal(size=NSAMPLE)
x_data = np.sin(0.75 * y_data) * 7.0 + y_data * 0.5 + r_data * 1.0
x_data = x_data.reshape((NSAMPLE, 1))
y_data = y_data.reshape((NSAMPLE, 1))
說明:
- 構造非線性映射關系的合成數據:
x = sin(0.75y)*7 + 0.5y + 噪聲
。 x
是輸入,y
是目標。
6. 構建模型
model = keras.Sequential([layers.Dense(N_HIDDEN, input_shape=(1,), activation='relu'),layers.Dense(N_HIDDEN, activation='relu'),MDN(OUTPUT_DIMS, N_MIXES)
])
model.compile(loss=get_mixture_loss_func(OUTPUT_DIMS, N_MIXES), optimizer=keras.optimizers.Adam())
model.summary()
說明:
- 構建一個兩層隱層的前饋神經網絡,輸出 MDN 層。
- 使用自定義的 MDN 損失函數訓練模型。
7. 模型訓練
model.fit(x_data, y_data, batch_size=128, epochs=200, validation_split=0.15, verbose=1)
- 批量大小 128,訓練 200 個 epoch,保留 15% 數據用于驗證。
8. 模型測試與預測可視化
x_test = np.linspace(-15, 15, 1000).astype(np.float32).reshape(-1, 1)
y_pred = model.predict(x_test)
y_samples = np.array([sample_from_output(p, OUTPUT_DIMS, N_MIXES) for p in y_pred])
- 對連續輸入進行預測并從預測的 GMM 中采樣。
可視化預測結果
plt.figure()
plt.scatter(x_test, y_samples, alpha=0.3, s=10)
plt.title("MDN Predictions")
plt.xlabel("x")
plt.ylabel("y")
plt.show()
原始數據與預測對比
plt.figure(figsize=(8, 5))
plt.scatter(x_data, y_data, label="Original Data", alpha=0.2, s=10)
plt.scatter(x_test, y_samples, label="MDN Samples", alpha=0.5, s=10, color='r')
plt.title("MDN Prediction vs Training Data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.grid(True)
plt.show()
總代碼如下
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math# 超參數
N_HIDDEN = 15
N_MIXES = 10
OUTPUT_DIMS = 1# === 1. 自定義 MDN 層 ===
class MDN(layers.Layer):def __init__(self, output_dims, num_mixtures, **kwargs):super(MDN, self).__init__(**kwargs)self.output_dims = output_dimsself.num_mixtures = num_mixturesself.params = self.num_mixtures * (2 * self.output_dims + 1) # pi, mu, sigmaself.dense = layers.Dense(self.params)def call(self, inputs):output = self.dense(inputs)return output# === 2. 自定義損失函數 ===
def get_mixture_loss_func(output_dims, num_mixtures):def mdn_loss(y_true, y_pred):y_true = tf.reshape(y_true, [-1, 1])out_mu = y_pred[:, :num_mixtures * output_dims]out_sigma = y_pred[:, num_mixtures * output_dims:2 * num_mixtures * output_dims]out_pi = y_pred[:, -num_mixtures:]mu = tf.reshape(out_mu, [-1, num_mixtures, output_dims])sigma = tf.exp(tf.reshape(out_sigma, [-1, num_mixtures, output_dims]))pi = tf.nn.softmax(out_pi)y_true = tf.tile(y_true[:, tf.newaxis, :], [1, num_mixtures, 1])normal_dist = tf.exp(-0.5 * tf.square((y_true - mu) / sigma)) / (sigma * tf.sqrt(2.0 * np.pi))prob = tf.reduce_prod(normal_dist, axis=2)weighted_prob = prob * piloss = -tf.math.log(tf.reduce_sum(weighted_prob, axis=1) + 1e-8)return tf.reduce_mean(loss)return mdn_loss# === 3. 從輸出采樣函數 ===
def sample_from_output(y_pred, output_dims, num_mixtures, temp=1.0):out_mu = y_pred[:num_mixtures * output_dims]out_sigma = y_pred[num_mixtures * output_dims:2 * num_mixtures * output_dims]out_pi = y_pred[-num_mixtures:]out_sigma = np.exp(out_sigma)out_pi = np.exp(out_pi / temp)out_pi /= np.sum(out_pi)mixture_idx = np.random.choice(np.arange(num_mixtures), p=out_pi)mu = out_mu[mixture_idx * output_dims:(mixture_idx + 1) * output_dims]sigma = out_sigma[mixture_idx * output_dims:(mixture_idx + 1) * output_dims]sample = np.random.normal(mu, sigma)return sample# === 4. 生成訓練數據 ===
NSAMPLE = 3000
y_data = np.float32(np.random.uniform(-10.5, 10.5, NSAMPLE))
r_data = np.random.normal(size=NSAMPLE)
x_data = np.sin(0.75 * y_data) * 7.0 + y_data * 0.5 + r_data * 1.0
x_data = x_data.reshape((NSAMPLE, 1))
y_data = y_data.reshape((NSAMPLE, 1))plt.figure()
plt.scatter(x_data, y_data, alpha=0.3)
plt.title("Training Data")
plt.show()# === 5. 構建模型 ===
model = keras.Sequential([layers.Dense(N_HIDDEN, input_shape=(1,), activation='relu'),layers.Dense(N_HIDDEN, activation='relu'),MDN(OUTPUT_DIMS, N_MIXES)
])
model.compile(loss=get_mixture_loss_func(OUTPUT_DIMS, N_MIXES), optimizer=keras.optimizers.Adam())
model.summary()# === 6. 模型訓練 ===
model.fit(x_data, y_data, batch_size=128, epochs=200, validation_split=0.15, verbose=1)# === 7. 測試與可視化 ===
x_test = np.linspace(-15, 15, 1000).astype(np.float32).reshape(-1, 1)
y_pred = model.predict(x_test)
y_samples = np.array([sample_from_output(p, OUTPUT_DIMS, N_MIXES) for p in y_pred])plt.figure()
plt.scatter(x_test, y_samples, alpha=0.3, s=10)
plt.title("MDN Predictions")
plt.xlabel("x")
plt.ylabel("y")
plt.show()
# === 8. 測試數據與預測對比圖 ===plt.figure(figsize=(8, 5))
plt.scatter(x_data, y_data, label="Original Data", alpha=0.2, s=10)
plt.scatter(x_test, y_samples, label="MDN Samples", alpha=0.5, s=10, color='r')
plt.title("MDN Prediction vs Training Data")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.grid(True)
plt.show()
總結
本項目展示了如何使用 TensorFlow 構建混合密度網絡,用以建模復雜的條件分布。相比傳統回歸模型,MDN 能夠生成多峰預測結果,適用于不確定性高、輸出存在多解的場景。