双层网络（仿射层+ReLu层）

编程入门行业动态更新时间:2024-10-10 13:19:34

双层<a href=https://www.elefans.com/category/jswz/34/1771439.html style= 网络（仿射层+ReLu层）"/>

双层网络（仿射层+ReLu层）

理论部分：2022Cs231n笔记-神经网络和反向传播_iwill323的博客-CSDN博客

导包和处理数据

仿射层

网络层代码测试

ReLu层

仿射+ReLu

Loss layers: Softmax and SVM

Two-layer network

SGD

Solver

使用solver训练模型

可视化权重

调试模型

需要注意的函数

矩阵和向量相加

矩阵和向量相乘

np.random.randint

setdefault

pop

导包和处理数据

# As usual, a bit of setup
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'# for auto-reloading external modules
# see 
%load_ext autoreload
%autoreload 2def rel_error(x, y):""" returns relative error """return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))# Load the (preprocessed) CIFAR10 data.
# 要去get_CIFAR10_data()文件中修改数据集位置：cifar10_dir = 'cs231n\datasets\CIFAR10'
data = get_CIFAR10_data()  
for k, v in list(data.items()):print(('%s: ' % k, v.shape))

这里把get_CIFAR10_data贴出来，做了书籍集加载、划分、减均值、调整通道位置这几件事。、

但是没有做重复加载数据的排查

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True
):"""Load the CIFAR-10 dataset from disk and perform preprocessing to prepareit for classifiers. These are the same steps as we used for the SVM, butcondensed to a single function."""# Load the raw CIFAR-10 datacifar10_dir = "cs231n\datasets\CIFAR10"X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)# Subsample the datamask = list(range(num_training, num_training + num_validation))X_val = X_train[mask]y_val = y_train[mask]mask = list(range(num_training))X_train = X_train[mask]y_train = y_train[mask]mask = list(range(num_test))X_test = X_test[mask]y_test = y_test[mask]# Normalize the data: subtract the mean imageif subtract_mean:mean_image = np.mean(X_train, axis=0)X_train -= mean_imageX_val -= mean_imageX_test -= mean_image# Transpose so that channels come firstX_train = X_train.transpose(0, 3, 1, 2).copy()X_val = X_val.transpose(0, 3, 1, 2).copy()X_test = X_test.transpose(0, 3, 1, 2).copy()# Package data into a dictionaryreturn {"X_train": X_train,"y_train": y_train,"X_val": X_val,"y_val": y_val,"X_test": X_test,"y_test": y_test,}

仿射层

def affine_forward(x, w, b):"""The input x has shape (N, d_1, ..., d_k) and contains a minibatch of Nexamples. We will reshape each input into a vector of dimension D = d_1 * ... * d_k, and then transform it to an output vector of dimension M.Inputs:- x: A numpy array containing input data, of shape (N, d_1, ..., d_k)- w: A numpy array of weights, of shape (D, M)- b: A numpy array of biases, of shape (M,)Returns a tuple of:- out: output, of shape (N, M)- cache: (x, w, b)"""x_vector = x.reshape(x.shape[0], -1) out = x_vector.dot(w) + b # 上面第一项形状是(N, M)，b的形状是(M,)，dot乘法要求x_vector的最后一维和b一致cache = (x, w, b)return out, cachedef affine_backward(dout, cache):"""Computes the backward pass for an affine layer.Inputs:- dout: Upstream derivative, of shape (N, M)- cache: Tuple of:- x: Input data, of shape (N, d_1, ... d_k)- w: Weights, of shape (D, M)- b: Biases, of shape (M,)Returns a tuple of:- dx: Gradient with respect to x, of shape (N, d1, ..., d_k)- dw: Gradient with respect to w, of shape (D, M)- db: Gradient with respect to b, of shape (M,)"""x, w, b = cachedx = dout.dot(w.T).reshape(x.shape) # (N, M)  * (M, D)x_vector = x.reshape(x.shape[0], -1)dw = x_vector.T.dot(dout)  # (D, N) * (N, M)    # db = np.dot(dout.T, np.ones(x.shape[0])) # dout.T：(M, N) 相当于每一行求和db = dout.sum(axis=0) # 这么写也对   为什么是求和不是求平均？return dx, dw, db

网络层代码测试

正向

# Test the affine_forward functionnum_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)out, _ = affine_forward(x, w, b)
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],[ 3.25553199,  3.5141327,   3.77273342]])# Compare your output with ours. The error should be around e-9 or less.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))

反向：梯度检验

# Test the affine_backward function
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)# The error should be around e-10 or less
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

这里把梯度检查的代码贴出来

def eval_numerical_gradient_array(f, x, df, h=1e-5):"""Evaluate a numeric gradient for a function that accepts a numpyarray and returns a numpy array."""grad = np.zeros_like(x)it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])while not it.finished:ix = it.multi_indexoldval = x[ix]x[ix] = oldval + hpos = f(x).copy()x[ix] = oldval - hneg = f(x).copy()x[ix] = oldvalgrad[ix] = np.sum((pos - neg) * df) / (2 * h)it.iternext()return grad