【深度學習筆記 Ⅰ】3 step by step (jupyter)

1. 導包

import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v2 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward% matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'% reload_ext autoreload
% autoreload 2np.random.seed(1)

2. 整體流程圖

整體流程圖

3. 初始化

3.1 創建2層神經網絡

def initialize_parameters(n_x, n_h, n_y):"""Argument:n_x -- size of the input layern_h -- size of the hidden layern_y -- size of the output layerReturns:parameters -- python dictionary containing your parameters:W1 -- weight matrix of shape (n_h, n_x)b1 -- bias vector of shape (n_h, 1)W2 -- weight matrix of shape (n_y, n_h)b2 -- bias vector of shape (n_y, 1)"""np.random.seed(1)W1 = np.random.randn(n_h, n_x) * 0.01b1 = np.zeros((n_h, 1))W2 = np.random.randn(n_y, n_h) * 0.01b2 = np.zeros((n_y, 1))parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2}return parameters

3.2 隱藏層神經網絡

def initialize_parameters_deep(layer_dims):"""Arguments:layer_dims -- python array (list) containing the dimensions of each layer in our networkReturns:parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])bl -- bias vector of shape (layer_dims[l], 1)"""np.random.seed(3)parameters = {}L = len(layer_dims)  # number of layers in the networkfor l in range(1, L):parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))return parameters

4. 前向傳播

4.1 線性前向傳播

def linear_forward(A, W, b):"""Implement the linear part of a layer's forward propagation.Arguments:A -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)Returns:Z -- the input of the activation function, also called pre-activation parameter cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently"""Z = np.dot(W, A) + bcache = (A, W, b)return Z, cache

4.2 激活函數


def linear_activation_forward(A_prev, W, b, activation):"""Implement the forward propagation for the LINEAR->ACTIVATION layerArguments:A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache";stored for computing the backward pass efficiently"""if activation == "sigmoid":Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = sigmoid(Z)elif activation == "relu":Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = relu(Z)cache = (linear_cache, activation_cache)return A, cache

隱藏層前向傳播

# GRADED FUNCTION: L_model_forwarddef L_model_forward(X, parameters):"""Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computationArguments:X -- data, numpy array of shape (input size, number of examples)parameters -- output of initialize_parameters_deep()Returns:AL -- last post-activation valuecaches -- list of caches containing:every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)the cache of linear_sigmoid_forward() (there is one, indexed L-1)"""caches = []A = XL = len(parameters) // 2  for l in range(1, L):A_prev = AA, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)],activation="relu")caches.append(cache)AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)],activation="sigmoid")caches.append(cache)return AL, caches

5. 損失函數

def compute_cost(AL, Y):"""Implement the cost function defined by equation (7).Arguments:AL -- probability vector corresponding to your label predictions, shape (1, number of examples)Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)Returns:cost -- cross-entropy cost"""m = Y.shape[1]cost = - np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / mcost = np.squeeze(cost)  # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).return cost

6. 反向傳播

6.1 線性反向傳播

def linear_backward(dZ, cache):"""Implement the linear portion of backward propagation for a single layer (layer l)Arguments:dZ -- Gradient of the cost with respect to the linear output (of current layer l)cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layerReturns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""A_prev, W, b = cachem = A_prev.shape[1]dW = np.dot(dZ,A_prev.T)/mdb = np.sum(dZ, axis = 1).reshape(dZ.shape[0], 1) / mdA_prev = np.dot(W.T,dZ)return dA_prev, dW, db

6.2 反向激活函數

# GRADED FUNCTION: linear_activation_backwarddef linear_activation_backward(dA, cache, activation):"""Implement the backward propagation for the LINEAR->ACTIVATION layer.Arguments:dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficientlyactivation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""linear_cache, activation_cache = cacheif activation == "relu":dZ = relu_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)elif activation == "sigmoid":dZ = sigmoid_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)return dA_prev, dW, db

6.3 L層神經網絡 *

# GRADED FUNCTION: L_model_backwarddef L_model_backward(AL, Y, caches):"""Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID groupArguments:AL -- probability vector, output of the forward propagation (L_model_forward())Y -- true "label" vector (containing 0 if non-cat, 1 if cat)caches -- list of caches containing:every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])Returns:grads -- A dictionary with the gradientsgrads["dA" + str(l)] = ...grads["dW" + str(l)] = ...grads["db" + str(l)] = ..."""grads = {}L = len(caches)  # the number of layersm = AL.shape[1]Y = Y.reshape(AL.shape)  # after this line, Y is the same shape as ALdAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))current_cache = caches[L - 1]grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')for l in reversed(range(L - 1)):current_cache = caches[l]dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, 'relu')grads["dA" + str(l)] = dA_prev_tempgrads["dW" + str(l + 1)] = dW_tempgrads["db" + str(l + 1)] = db_tempreturn grads

6.4 更新參數


def update_parameters(parameters, grads, learning_rate):"""Update parameters using gradient descentArguments:parameters -- python dictionary containing your parameters grads -- python dictionary containing your gradients, output of L_model_backwardReturns:parameters -- python dictionary containing your updated parameters parameters["W" + str(l)] = ... parameters["b" + str(l)] = ..."""L = len(parameters) // 2  # number of layers in the neural networkfor l in range(L):parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]return parameters

7 - Conclusion

Congrats on implementing all the functions required for building a deep neural network!

We know it was a long assignment but going forward it will only get better. The next part of the assignment is easier.

In the next assignment you will put all these together to build two models:

  • A two-layer neural network
  • An L-layer neural network

You will in fact use these models to classify cat vs non-cat images!


# GRADED FUNCTION: L_layer_modeldef L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False):  #lr was 0.009"""Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.Arguments:X -- data, numpy array of shape (number of examples, num_px * num_px * 3)Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).learning_rate -- learning rate of the gradient descent update rulenum_iterations -- number of iterations of the optimization loopprint_cost -- if True, it prints the cost every 100 stepsReturns:parameters -- parameters learnt by the model. They can then be used to predict."""np.random.seed(1)costs = []  # keep track of cost# Parameters initialization.### START CODE HERE ###parameters = initialize_parameters_deep(layers_dims)### END CODE HERE #### Loop (gradient descent)for i in range(0, num_iterations):# Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.### START CODE HERE ### (≈ 1 line of code)AL, cache = L_model_forward(X, parameters)### END CODE HERE #### Compute cost.### START CODE HERE ### (≈ 1 line of code)cost = compute_cost(AL, Y)### END CODE HERE #### Backward propagation.### START CODE HERE ### (≈ 1 line of code)grads = L_model_backward(AL, Y, cache)### END CODE HERE #### Update parameters.### START CODE HERE ### (≈ 1 line of code)parameters = update_parameters(parameters,grads,learning_rate)### END CODE HERE #### Print the cost every 100 training exampleif print_cost and i % 100 == 0:print("Cost after iteration %i: %f" % (i, cost))if print_cost and i % 100 == 0:costs.append(cost)# plot the costplt.plot(np.squeeze(costs))plt.ylabel('cost')plt.xlabel('iterations (per tens)')plt.title("Learning rate =" + str(learning_rate))plt.show()return parameters

本文來自互聯網用戶投稿,該文觀點僅代表作者本人,不代表本站立場。本站僅提供信息存儲空間服務,不擁有所有權,不承擔相關法律責任。
如若轉載,請注明出處:http://www.pswp.cn/news/915391.shtml
繁體地址,請注明出處:http://hk.pswp.cn/news/915391.shtml
英文地址,請注明出處:http://en.pswp.cn/news/915391.shtml

如若內容造成侵權/違法違規/事實不符,請聯系多彩編程網進行投訴反饋email:809451989@qq.com,一經查實,立即刪除!

相關文章

前端流式渲染流式SSR詳解

以下是關于前端流式渲染及流式SSR(Server-Side Rendering)的詳細解析,結合核心原理、技術實現、優化策略及實際應用場景展開說明:?? 一、流式渲染基礎原理 核心概念 ? 流式渲染:數據通過分塊傳輸(Chunke…

Redis通用常見命令(含面試題)

核心命令get 根據key取valueset 把key和vlaue存入進去key和value本事上都是字符串,但在操作的時候可以不用加上引號""Redis作為鍵值對的結構,key固定就是字符串,value實際上會有多種類型(字符串哈希表,列表&…

react/vue vite ts項目中,自動引入路由文件、 import.meta.glob動態引入路由 無需手動引入

utils/autoRouteHelper.ts // src/utils/autoRouteHelper.ts import { lazy } from "react"; import withLoading from "/components/router/withLoading";/** 自動生成某個文件夾下的子路由 */ interface RouteItem {path: string;element?: any;childre…

Linux簡單了解歷史

一、引言Linux是計算機經久不衰的一個計算機操作系統,在那個unix、蘋果macOS、微軟Window神仙打架的年代拼出自己的一席之地。最初的Linux完全就是一個unix的一個翻版,并且最開始的版本(0.01)就是一個差不多一萬行簡單到不能再簡單的版本。那現在Linux是…

lua(xlua)基礎知識點記錄二

1. 關于lua函數傳參參數在lua中給function傳遞參數的時候一般分為兩種情況:值傳遞和引用傳遞值傳遞:值傳遞:數字、字符串、布爾值、nil等基本類型通過值傳遞。函數內部接收的是外部變量的副本,修改副本不會影響原始變量。 雖然我們…

分治算法---歸并

1、排序數組 class Solution {vector<int> tmp; public:vector<int> sortArray(vector<int>& nums) {tmp.resize(nums.size());mergeSort(nums,0,nums.size() - 1);return nums;}void mergeSort(vector<int>& nums, int left , int right){if…

《計算機網絡》實驗報告三 UDP協議分析

目 錄 1、實驗目的 2、實驗環境 3、實驗內容 3.1 DNS查詢UDP數據分析 3.2 QQ通信UDP數據分析 4、實驗結果與分析 4.1 DNS查詢UDP數據分析 4.2 QQ通信UDP數據分析 4.3 根據捕獲的數據包&#xff0c;分析UDP的報文結構&#xff0c;將UDP協議中個字段名&#xff0c;字段…

Mysql 學習總結(90)—— Mysql 8.0 25 條性能優化實戰指南

1. 內存配置優化 # my.cnf 關鍵內存參數 innodb_buffer_pool_size = 8G # 建議設置為物理內存的70-80% innodb_log_buffer_size = 64M # 日志緩沖區大小 query_cache_size = 0 # MySQL 8.0已移除,確保關閉 tmp_table_size = 256M # 臨時表大小 max_…

嵌入式通信DQ單總線協議及UART(一)

文章目錄一、DS18B20--DQ單總線1.1 單總線時序結構分析1.1.1 初始化&#xff1a;1.1.2 發送一位1.1.3 接收一位1.1.5 發送字節1.1.6 操作流程1.1.7 數據幀的理解1.1.8 數據幀的理解二、UART2.1 同步通信和異步通信2.2 雙工通信2.3 串行通信常用數據校驗方式2.3.1 奇偶檢驗2.3.2…

2025年SEVC SCI2區,利用增強粒子群算法(MR-MPSO)優化MapReduce效率和降低復雜性,深度解析+性能實測

目錄1.摘要2.MapReduce-Modified Particle Swarm Optimization (MR-MPSO)3.結果展示4.參考文獻5.算法輔導應用定制讀者交流1.摘要 大數據的迅猛增長帶來了嚴峻的數據管理挑戰&#xff0c;尤其是在數據分布不均的龐大數據庫中。由于這種不匹配&#xff0c;傳統軟件系統的效率大…

10-day07文本分類

文本分類使用場景文本分類任務 文本分類-機器學習貝葉斯算法應用在NLP中的應用 用貝葉斯公式處理文本分類任務 一個合理假設&#xff1a; 文本屬于哪個類別&#xff0c;與文本中包含哪些詞相關 任務&#xff1a; 知道文本中有哪些詞&#xff0c;預測文本屬于某類別的概率 貝葉斯…

Apache SeaTunnel詳解與部署(最新版本2.3.11)

目錄 一、概述 1.1、軟件介紹 1.2、解決問題? 1.3、軟件特性? 1.4、使用用戶 1.5、產品對比 二、架構 2.1、運行流程 2.2、連接器? 2.3、引擎 2.3.1、設計理念 2.3.2、集群管理? 2.3.3、核心功能? 2.3.4、引擎對比 三、軟件部署 3.1、Docker部署 3.2、發…

pytorch | minist手寫數據集

一、神經網絡神經網絡&#xff08;Neural Network&#xff09;是一種受生物神經系統&#xff08;尤其是大腦神經元連接方式&#xff09;啟發的機器學習模型&#xff0c;是深度學習的核心基礎。它通過模擬大量 “人工神經元” 的互聯結構&#xff0c;學習數據中的復雜模式和規律…

[C/C++安全編程]_[中級]_[如何避免出現野指針]

場景 在Rust里不會出現野指針的情況&#xff0c;那么在C里能避免嗎&#xff1f; 說明 野指針是指指向無效內存地址的指針&#xff0c;訪問它會導致未定義行為&#xff0c;可能引發程序崩潰、數據損壞或安全漏洞。它是 C/C 等手動內存管理語言中的常見錯誤&#xff0c;而 Rust…

機器學習基礎:從數據到智能的入門指南

一、何謂機器學習? 在我們的日常生活中&#xff0c;機器學習的身影無處不在。當你打開購物軟件&#xff0c;它總能精準推薦你可能喜歡的商品&#xff1b;當你解鎖手機&#xff0c;人臉識別瞬間完成&#xff1b;當你使用語音助手&#xff0c;它能準確理解你的指令。這些背后&a…

steam游戲搬磚項目超完整版實操分享

大家好&#xff0c;我是阿陽&#xff0c;今天再次最詳細的給大家綜合全面的分析講解下steam搬磚&#xff0c;可以點擊后面跳轉往期文章了再次解下阿陽網客&#xff1a;關于steam游戲搬磚項目&#xff0c;我想說&#xff01;最早是21年5月份公開朋友圈&#xff0c;初次接觸是在2…

vue2 面試題及詳細答案150道(21 - 40)

《前后端面試題》專欄集合了前后端各個知識模塊的面試題&#xff0c;包括html&#xff0c;javascript&#xff0c;css&#xff0c;vue&#xff0c;react&#xff0c;java&#xff0c;Openlayers&#xff0c;leaflet&#xff0c;cesium&#xff0c;mapboxGL&#xff0c;threejs&…

原生前端JavaScript/CSS與現代框架(Vue、React)的聯系與區別(詳細版)

原生前端JavaScript/CSS與現代框架&#xff08;Vue、React&#xff09;的聯系與區別&#xff0c;以及運行環境和條件 目錄 引言原生前端技術概述 JavaScript基礎CSS基礎 現代框架概述 Vue.jsReact 聯系與相似性主要區別對比運行環境和條件選擇建議總結 引言 在現代Web開發中&…

基于機器視覺的邁克耳孫干涉環自動計數系統設計與實現

基于機器視覺的邁克耳孫干涉環自動計數系統設計與實現 前些天發現了一個巨牛的人工智能學習網站&#xff0c;通俗易懂&#xff0c;風趣幽默&#xff0c;忍不住分享一下給大家。點擊跳轉到網站。 摘要 本文設計并實現了一種基于機器視覺的邁克耳孫干涉環自動計數系統。該系統…

設計模式筆記(1)簡單工廠模式

最近在看程杰的《大話設計模式》&#xff0c;在這里做一點筆記。 書中主要有兩個角色&#xff1a; 小菜&#xff1a;初學者&#xff0c;學生&#xff1b; 大鳥&#xff1a;小菜表哥&#xff0c;大佬。 也按圖中的對話形式 01 簡單工廠模式 要求&#xff1a;使用c、Java、C#或VB…