双层网络(仿射层+ReLu层)

编程入门 行业动态 更新时间:2024-10-10 13:19:34

双层<a href=https://www.elefans.com/category/jswz/34/1771439.html style=网络(仿射层+ReLu层)"/>

双层网络(仿射层+ReLu层)

理论部分:2022Cs231n笔记-神经网络和反向传播_iwill323的博客-CSDN博客

目录

导包和处理数据

仿射层

网络层代码测试

ReLu层

仿射+ReLu

Loss layers: Softmax and SVM

Two-layer network

SGD

Solver

使用solver训练模型

 可视化权重

 调试模型

需要注意的函数

矩阵和向量相加

矩阵和向量相乘

np.random.randint

setdefault

pop


导包和处理数据

# As usual, a bit of setup
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient, eval_numerical_gradient_array%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'# for auto-reloading external modules
# see 
%load_ext autoreload
%autoreload 2def rel_error(x, y):""" returns relative error """return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))# Load the (preprocessed) CIFAR10 data.
# 要去get_CIFAR10_data()文件中修改数据集位置:cifar10_dir = 'cs231n\datasets\CIFAR10'
data = get_CIFAR10_data()  
for k, v in list(data.items()):print(('%s: ' % k, v.shape))

这里把get_CIFAR10_data贴出来,做了书籍集加载、划分、减均值、调整通道位置这几件事。、

但是没有做重复加载数据的排查

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True
):"""Load the CIFAR-10 dataset from disk and perform preprocessing to prepareit for classifiers. These are the same steps as we used for the SVM, butcondensed to a single function."""# Load the raw CIFAR-10 datacifar10_dir = "cs231n\datasets\CIFAR10"X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)# Subsample the datamask = list(range(num_training, num_training + num_validation))X_val = X_train[mask]y_val = y_train[mask]mask = list(range(num_training))X_train = X_train[mask]y_train = y_train[mask]mask = list(range(num_test))X_test = X_test[mask]y_test = y_test[mask]# Normalize the data: subtract the mean imageif subtract_mean:mean_image = np.mean(X_train, axis=0)X_train -= mean_imageX_val -= mean_imageX_test -= mean_image# Transpose so that channels come firstX_train = X_train.transpose(0, 3, 1, 2).copy()X_val = X_val.transpose(0, 3, 1, 2).copy()X_test = X_test.transpose(0, 3, 1, 2).copy()# Package data into a dictionaryreturn {"X_train": X_train,"y_train": y_train,"X_val": X_val,"y_val": y_val,"X_test": X_test,"y_test": y_test,}

仿射层

def affine_forward(x, w, b):"""The input x has shape (N, d_1, ..., d_k) and contains a minibatch of Nexamples. We will reshape each input into a vector of dimension D = d_1 * ... * d_k, and then transform it to an output vector of dimension M.Inputs:- x: A numpy array containing input data, of shape (N, d_1, ..., d_k)- w: A numpy array of weights, of shape (D, M)- b: A numpy array of biases, of shape (M,)Returns a tuple of:- out: output, of shape (N, M)- cache: (x, w, b)"""x_vector = x.reshape(x.shape[0], -1) out = x_vector.dot(w) + b # 上面第一项形状是(N, M),b的形状是(M,),dot乘法要求x_vector的最后一维和b一致cache = (x, w, b)return out, cachedef affine_backward(dout, cache):"""Computes the backward pass for an affine layer.Inputs:- dout: Upstream derivative, of shape (N, M)- cache: Tuple of:- x: Input data, of shape (N, d_1, ... d_k)- w: Weights, of shape (D, M)- b: Biases, of shape (M,)Returns a tuple of:- dx: Gradient with respect to x, of shape (N, d1, ..., d_k)- dw: Gradient with respect to w, of shape (D, M)- db: Gradient with respect to b, of shape (M,)"""x, w, b = cachedx = dout.dot(w.T).reshape(x.shape) # (N, M)  * (M, D)x_vector = x.reshape(x.shape[0], -1)dw = x_vector.T.dot(dout)  # (D, N) * (N, M)    # db = np.dot(dout.T, np.ones(x.shape[0])) # dout.T:(M, N) 相当于每一行求和db = dout.sum(axis=0) # 这么写也对   为什么是求和不是求平均?return dx, dw, db

网络层代码测试

正向

# Test the affine_forward functionnum_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)out, _ = affine_forward(x, w, b)
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],[ 3.25553199,  3.5141327,   3.77273342]])# Compare your output with ours. The error should be around e-9 or less.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))

反向:梯度检验

# Test the affine_backward function
np.random.seed(231)
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)_, cache = affine_forward(x, w, b)
dx, dw, db = affine_backward(dout, cache)# The error should be around e-10 or less
print('Testing affine_backward function:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

这里把梯度检查的代码贴出来

def eval_numerical_gradient_array(f, x, df, h=1e-5):"""Evaluate a numeric gradient for a function that accepts a numpyarray and returns a numpy array."""grad = np.zeros_like(x)it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])while not it.finished:ix = it.multi_indexoldval = x[ix]x[ix] = oldval + hpos = f(x).copy()x[ix] = oldval - hneg = f(x).copy()x[ix] = oldvalgrad[ix] = np.sum((pos - neg) * df) / (2 * h)it.iternext()return grad

ReLu层

def relu_forward(x):"""Computes the forward pass for a layer of rectified linear units (ReLUs).Input:- x: Inputs, of any shapeReturns a tuple of:- out: Output, of the same shape as x- cache: x"""out = Noneout = np.maximum(0,x)cache = xreturn out, cachedef relu_backward(dout, cache):"""Computes the backward pass for a layer of rectified linear units (ReLUs).Input:- dout: Upstream derivatives, of any shape- cache: Input x, of same shape as doutReturns:- dx: Gradient with respect to x"""    dx, x = None, cachedx = x    dx[dx < 0] = 0  # x是否会被改变?dx[dx > 0] = 1dx *= doutreturn dx

仿射+ReLu

def affine_relu_forward(x, w, b):"""Convenience layer that perorms an affine transform followed by a ReLUInputs:- x: Input to the affine layer- w, b: Weights for the affine layerReturns a tuple of:- out: Output from the ReLU- cache: Object to give to the backward pass"""a, fc_cache = affine_forward(x, w, b) # out, cacheout, relu_cache = relu_forward(a)cache = (fc_cache, relu_cache)return out, cachedef affine_relu_backward(dout, cache):"""Backward pass for the affine-relu convenience layer"""fc_cache, relu_cache = cacheda = relu_backward(dout, relu_cache)dx, dw, db = affine_backward(da, fc_cache)return dx, dw, db

Loss layers: Softmax and SVM

def svm_loss(x, y):"""Computes the loss and gradient using for multiclass SVM classification.Inputs:- x: Input data, of shape (N, C) where x[i, j] is the score for the jthclass for the ith input.- y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and0 <= y[i] < CReturns a tuple of:- loss: Scalar giving the loss- dx: Gradient of the loss with respect to x"""loss, dx = None, Nonenum_train = x.shape[0]num_classes = x.shape[1]correct_class_score = x[np.arange(num_train), y].reshape(-1, 1)margin = np.maximum(0, x - correct_class_score + 1)margin[np.arange(num_train), y] = 0loss = np.sum(margin) / num_trainmargin[margin > 0] = 1correct_number = np.sum(margin, axis = 1)margin[np.arange(num_train), y] -= correct_numberdx = margin / num_trainreturn loss, dxdef softmax_loss(x, y):"""Computes the loss and gradient for softmax classification.Inputs:- x: Input data, of shape (N, C) where x[i, j] is the score for the jthclass for the ith input.- y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and0 <= y[i] < CReturns a tuple of:- loss: Scalar giving the loss- dx: Gradient of the loss with respect to x"""loss, dx = None, Nonenum_train = x.shape[0]scores = x - np.max(x, axis = 1).reshape(-1, 1)normalized_scores = np.exp(scores) / np.sum(np.exp(scores), axis = 1).reshape(-1,1)loss = -np.sum(np.log(normalized_scores[np.arange(num_train), y]))loss /= num_trainnormalized_scores[np.arange(num_train), y] -= 1dx = normalized_scores / num_trainreturn (loss, dx)

Two-layer network

import numpy as npclass TwoLayerNet(object):"""A two-layer fully-connected neural network with ReLU nonlinearity andsoftmax loss that uses a modular layer design. We assume an input dimensionof D, a hidden dimension of H, and perform classification over C classes.The architecure should be affine - relu - affine - softmax.Note that this class does not implement gradient descent; instead, itwill interact with a separate Solver object that is responsible for runningoptimization."""def __init__(self,input_dim=3 * 32 * 32,hidden_dim=100,num_classes=10,weight_scale=1e-3,reg=0.0,):"""Initialize a new network.Inputs:- input_dim: An integer giving the size of the input- hidden_dim: An integer giving the size of the hidden layer- num_classes: An integer giving the number of classes to classify- weight_scale: Scalar giving the standard deviation for randominitialization of the weights.- reg: Scalar giving L2 regularization strength."""self.params = {}self.reg = reg# Initialize the weights and biases of the two-layer net. Weights W1 = np.random.normal(0.0, weight_scale, (input_dim, hidden_dim))W2 = np.random.normal(0.0, weight_scale, (hidden_dim, num_classes))b1 = np.zeros(hidden_dim)b2 = np.zeros(num_classes)self.params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}def loss(self, X, y=None):"""Compute loss and gradient for a minibatch of data.Inputs:- X: Array of input data of shape (N, d_1, ..., d_k)- y: Array of labels, of shape (N,). y[i] gives the label for X[i].Returns:If y is None, then run a test-time forward pass of the model and return:- scores: Array of shape (N, C) giving classification scores, wherescores[i, c] is the classification score for X[i] and class c.If y is not None, then run a training-time forward and backward pass andreturn a tuple of:- loss: Scalar value giving the loss- grads: Dictionary with the same keys as self.params, mapping parameternames to gradients of the loss with respect to those parameters."""scores = NoneW1 = self.params['W1']b1 = self.params['b1']W2 = self.params['W2']b2 = self.params['b2']relu_output, relu_cache = affine_relu_forward(X, W1, b1)scores, cache = affine_forward(relu_output, W2, b2)# If y is None then we are in test mode so just return scoresif y is None:return scoresloss, grads = 0, {}loss, d_scores = softmax_loss(scores, y)loss += 0.5 * self.reg * (W1 * W1).sum() + 0.5 * self.reg * (W2 * W2).sum()dx, grads['W2'], grads['b2'] = affine_backward(d_scores, cache)_, grads['W1'], grads['b1'] = affine_relu_backward(dx, relu_cache)grads['W1'] += self.reg * W1grads['W2'] += self.reg * W2return loss, grads

SGD

def sgd(w, dw, config=None):"""Performs vanilla stochastic gradient descent.config format:- learning_rate: Scalar learning rate."""if config is None:config = {}config.setdefault("learning_rate", 1e-2)w -= config["learning_rate"] * dwreturn w, config

Solver

from __future__ import print_function, division
from future import standard_library
from cs231n import optimstandard_library.install_aliases()
import os
import pickle as pickleclass Solver(object):"""A Solver encapsulates all the logic necessary for training classificationmodels. The Solver performs stochastic gradient descent using differentupdate rules defined in optim.py.The solver accepts both training and validataion data and labels so it canperiodically check classification accuracy on both training and validationdata to watch out for overfitting.Example usage might look something like this:data = {'X_train': # training data'y_train': # training labels'X_val': # validation data'y_val': # validation labels}model = MyAwesomeModel(hidden_size=100, reg=10)solver = Solver(model, data,update_rule='sgd',optim_config={'learning_rate': 1e-4,},lr_decay=0.95,num_epochs=5, batch_size=200,print_every=100)solver.train()A Solver works on a model object that must conform to the following API:- model.params must be a dictionary mapping string parameter names to numpyarrays containing parameter values.- model.loss(X, y) must be a function that computes training-time loss andgradients, and test-time classification scores, with the following inputsand outputs:Inputs:- X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k)- y: Array of labels, of shape (N,) giving labels for X where y[i] is thelabel for X[i].Returns:If y is None, run a test-time forward pass and return:- scores: Array of shape (N, C) giving classification scores for X wherescores[i, c] gives the score of class c for X[i].If y is not None, run a training time forward and backward pass andreturn a tuple of:- loss: Scalar giving the loss- grads: Dictionary with the same keys as self.params mapping parameternames to gradients of the loss with respect to those parameters."""def __init__(self, model, data, **kwargs):"""Construct a new Solver instance.Required arguments:- model: A model object conforming to the API described above- data: A dictionary of training and validation data containing:'X_train': Array, shape (N_train, d_1, ..., d_k) of training images'X_val': Array, shape (N_val, d_1, ..., d_k) of validation images'y_train': Array, shape (N_train,) of labels for training images'y_val': Array, shape (N_val,) of labels for validation imagesOptional arguments:- update_rule: A string giving the name of an update rule in optim.py.Default is 'sgd'.- optim_config: A dictionary containing hyperparameters that will bepassed to the chosen update rule. Each update rule requires differenthyperparameters (see optim.py) but all update rules require a'learning_rate' parameter so that should always be present.- lr_decay: A scalar for learning rate decay; after each epoch thelearning rate is multiplied by this value.- batch_size: Size of minibatches used to compute loss and gradientduring training.- num_epochs: The number of epochs to run for during training.- print_every: Integer; training losses will be printed everyprint_every iterations.- verbose: Boolean; if set to false then no output will be printedduring training.- num_train_samples: Number of training samples used to check trainingaccuracy; default is 1000; set to None to use entire training set.- num_val_samples: Number of validation samples to use to check valaccuracy; default is None, which uses the entire validation set.- checkpoint_name: If not None, then save model checkpoints here everyepoch."""self.model = modelself.X_train = data['X_train']self.y_train = data["y_train"]self.X_val = data["X_val"]self.y_val = data["y_val"]# Unpack keyword argumentsself.update_rule = kwargs.pop("update_rule", "sgd")self.optim_config = kwargs.pop("optim_config", {})self.lr_decay = kwargs.pop("lr_decay", 1.0)self.batch_size = kwargs.pop("batch_size", 100)self.num_epochs = kwargs.pop("num_epochs", 10)self.num_train_samples = kwargs.pop("num_train_samples", 1000)self.num_val_samples = kwargs.pop("num_val_samples", None)self.checkpoint_name = kwargs.pop("checkpoint_name", None)self.print_every = kwargs.pop("print_every", 10)self.verbose = kwargs.pop("verbose", True)# Throw an error if there are extra keyword argumentsif len(kwargs) > 0:extra = ", ".join('"%s"' % k for k in list(kwargs.keys()))raise ValueError("Unrecognized arguments %s" % extra)# Make sure the update rule exists, then replace the string name with the actual functionif not hasattr(optim, self.update_rule):raise ValueError('Invalid update_rule "%s"' % self.update_rule)self.update_rule = getattr(optim, self.update_rule)# hasattr() 函数用于判断对象是否包含对应的属性。 在optim包中找对应的update_rule self._reset()def _reset(self):"""Set up some book-keeping variables for optimization. Don't call this manually."""# Set up some variables for book-keepingself.epoch = 0self.best_val_acc = 0self.best_params = {}self.loss_history = []self.train_acc_history = []self.val_acc_history = []# Make a deep copy of the optim_config for each parameterself.optim_configs = {}for p in self.model.params:  # model.params是一个字典d = {k: v for k, v in self.optim_config.items()} # optim_config是字典self.optim_configs[p] = d   # optim_configs每一个元素岂不是一样?def _step(self):"""Make a single gradient update. This is called by train()"""# Make a minibatch of training datanum_train = self.X_train.shape[0]batch_mask = np.random.choice(num_train, self.batch_size)X_batch = self.X_train[batch_mask]y_batch = self.y_train[batch_mask]# Compute loss and gradientloss, grads = self.model.loss(X_batch, y_batch)self.loss_history.append(loss)# Perform a parameter updatefor p, w in self.model.params.items():  # model.params是一个字典dw = grads[p]  # dw只是一个名字,实际上可以是任何参数config = self.optim_configs[p]  # 比如learning ratenext_w, next_config = self.update_rule(w, dw, config)self.model.params[p] = next_wself.optim_configs[p] = next_configdef _save_checkpoint(self):if self.checkpoint_name is None:returncheckpoint = {"model": self.model,"update_rule": self.update_rule,"lr_decay": self.lr_decay,"optim_config": self.optim_config,"batch_size": self.batch_size,"num_train_samples": self.num_train_samples,"num_val_samples": self.num_val_samples,"epoch": self.epoch,"loss_history": self.loss_history,"train_acc_history": self.train_acc_history,"val_acc_history": self.val_acc_history,}filename = "%s_epoch_%d.pkl" % (self.checkpoint_name, self.epoch)if self.verbose:print('Saving checkpoint to "%s"' % filename)with open(filename, "wb") as f:pickle.dump(checkpoint, f)def check_accuracy(self, X, y, num_samples=None, batch_size=100):"""Check accuracy of the model on the provided data.Inputs:- X: Array of data, of shape (N, d_1, ..., d_k)- y: Array of labels, of shape (N,)- num_samples: If not None, subsample the data and only test the modelon num_samples datapoints.- batch_size: Split X and y into batches of this size to avoid usingtoo much memory.Returns:- acc: Scalar giving the fraction of instances that were correctlyclassified by the model."""# Maybe subsample the dataN = X.shape[0]if num_samples is not None and N > num_samples:mask = np.random.choice(N, num_samples)N = num_samplesX = X[mask]y = y[mask]# Compute predictions in batchesnum_batches = N // batch_sizeif N % batch_size != 0:num_batches += 1y_pred = []for i in range(num_batches):start = i * batch_sizeend = (i + 1) * batch_sizescores = self.model.loss(X[start:end])  # model.loss中y==none返回的是scorey_pred.append(np.argmax(scores, axis=1))y_pred = np.hstack(y_pred)acc = np.mean(y_pred == y)return accdef train(self):"""Run optimization to train the model."""num_train = self.X_train.shape[0]iterations_per_epoch = max(num_train // self.batch_size, 1)num_iterations = self.num_epochs * iterations_per_epochfor t in range(num_iterations):self._step()# Maybe print training lossif self.verbose and t % self.print_every == 0:print("(Iteration %d / %d) loss: %f"% (t + 1, num_iterations, self.loss_history[-1]))# At the end of every epoch, increment the epoch counter and decay# the learning rate.epoch_end = (t + 1) % iterations_per_epoch == 0if epoch_end:self.epoch += 1for k in self.optim_configs:self.optim_configs[k]["learning_rate"] *= self.lr_decay# Check train and val accuracy on the first iteration, the last# iteration, and at the end of each epoch.first_it = t == 0last_it = t == num_iterations - 1if first_it or last_it or epoch_end:train_acc = self.check_accuracy(self.X_train, self.y_train, num_samples=self.num_train_samples)val_acc = self.check_accuracy(self.X_val, self.y_val, num_samples=self.num_val_samples)self.train_acc_history.append(train_acc)self.val_acc_history.append(val_acc)self._save_checkpoint()if self.verbose:print("(Epoch %d / %d) train acc: %f; val_acc: %f"% (self.epoch, self.num_epochs, train_acc, val_acc))# Keep track of the best modelif val_acc > self.best_val_acc:self.best_val_acc = val_accself.best_params = {}for k, v in self.model.params.items():self.best_params[k] = v.copy()# At the end of training swap the best params into the modelself.model.params = self.best_params

使用solver训练模型

input_size = 32 * 32 * 3
hidden_size = 50
num_classes = 10
model = TwoLayerNet(input_size, hidden_size, num_classes)
solver = Nonesolver = Solver(model, data,update_rule='sgd',optim_config={'learning_rate': 1e-3,},lr_decay=0.95,num_epochs=5, batch_size=100,print_every=500, verbose = True)
# 如果上面不输入data后面的参数,会使用默认的
solver.train()   
(Iteration 1 / 2450) loss: 2.300230
(Epoch 0 / 5) train acc: 0.126000; val_acc: 0.127000
(Epoch 1 / 5) train acc: 0.438000; val_acc: 0.434000
(Iteration 501 / 2450) loss: 1.513676
(Epoch 2 / 5) train acc: 0.475000; val_acc: 0.464000
(Iteration 1001 / 2450) loss: 1.297377
(Epoch 3 / 5) train acc: 0.491000; val_acc: 0.471000
(Iteration 1501 / 2450) loss: 1.630036
(Epoch 4 / 5) train acc: 0.511000; val_acc: 0.477000
(Iteration 2001 / 2450) loss: 1.391281
(Epoch 5 / 5) train acc: 0.516000; val_acc: 0.479000

打印loss和accuracy

# Run this cell to visualize training loss and train / val accuracyplt.subplot(2, 1, 1)
plt.title('Training loss')
plt.plot(solver.loss_history, 'o')
plt.xlabel('Iteration')plt.subplot(2, 1, 2)
plt.title('Accuracy')
plt.plot(solver.train_acc_history, '-o', label='train')
plt.plot(solver.val_acc_history, '-o', label='val')
plt.plot([0.5] * len(solver.val_acc_history), 'k--')
plt.xlabel('Epoch')
plt.legend(loc='lower right')
plt.gcf().set_size_inches(15, 12)
plt.show()

 可视化权重

from math import sqrt, ceil
import numpy as npdef visualize_grid(Xs, ubound=255.0, padding=1):"""Reshape a 4D tensor of image data to a grid for easy visualization.Inputs:- Xs: Data of shape (N, H, W, C)- ubound: Output grid will have values scaled to the range [0, ubound]- padding: The number of blank pixels between elements of the grid"""(N, H, W, C) = Xs.shapegrid_size = int(ceil(sqrt(N)))grid_height = H * grid_size + padding * (grid_size - 1)grid_width = W * grid_size + padding * (grid_size - 1)grid = np.zeros((grid_height, grid_width, C))next_idx = 0y0, y1 = 0, Hfor y in range(grid_size):x0, x1 = 0, Wfor x in range(grid_size):if next_idx < N:img = Xs[next_idx]low, high = np.min(img), np.max(img)grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)# grid[y0:y1, x0:x1] = Xs[next_idx]next_idx += 1x0 += W + paddingx1 += W + paddingy0 += H + paddingy1 += H + padding# grid_max = np.max(grid)# grid_min = np.min(grid)# grid = ubound * (grid - grid_min) / (grid_max - grid_min)return grid# Visualize the weights of the network
def show_net_weights(net):W1 = net.params['W1']W1 = W1.reshape(3, 32, 32, -1).transpose(3, 1, 2, 0)plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))plt.gca().axis('off')plt.show()show_net_weights(model)

 调试模型

Below, you should experiment with different values of the various hyperparameters, including hidden layer size, learning rate, numer of training epochs, and regularization strength. You might also consider tuning the learning rate decay, but you should be able to get good performance using the default value.

下面的代码对超参数的搜索介于随机搜索和网格搜索之间

best_model = None
best_val_accuracy = 0.0#Tune hyperparameters using the validation set. Store your best trained 
input_dim = 32 * 32 * 3
num_classes= 10
best_val_accuracy = 0.0hidden_dims = [50, 60, 80]
lrs = [5e-5, 1e-4, 5e-4, 1e-3]
regs = [0.5, 0.6]
epos = [1, 2]
def random_chose_para(hidden_dims, lrs, regs, epos):hidden_dim = hidden_dims[np.random.randint(0, len(hidden_dims))]lr = lrs[np.random.randint(0, len(lrs))]reg = regs[np.random.randint(0, len(regs))]epo = epos[np.random.randint(0, len(epos))]return hidden_dim, lr, reg, epofor ite in range(1):hidden_dim, lr, reg, epo= random_chose_para(hidden_dims, lrs, regs, epos)model = TwoLayerNet(input_dim, hidden_dim, num_classes, reg = reg)solver = Solver(model, data,update_rule='sgd',optim_config={'learning_rate': lr,},lr_decay=0.95,num_epochs=epo, batch_size=100,print_every=500, verbose = True)solver.train()print('Validation accuracy: ', best_val_accuracy)if solver.best_val_acc > best_val_accuracy:best_val_accuracy = solver.best_val_accbest_model = modelprint('Validation accuracy: ', best_val_accuracy)

需要注意的函数

矩阵和向量相加

>>a = np.array([[1,2,3],[4,5,6],[7,8,9]])
>>print(a)

[[1 2 3][4 5 6][7 8 9]]

>>b = np.array([1,2,3])
>>print(b.shape)

(3,)

>>print(a+b)

[[ 2  4  6][ 5  7  9][ 8 10 12]]

矩阵和向量相乘

>>a = np.ones(2)
>>b = np.array([[1,2],[3,4]])
>>print(b.dot(a))

[3. 7.]

np.random.randint

>>print(np.random.randint(5, size = 3))

[1 3 2]

>>print(np.random.randint(5, size = (2,2)))

[[1 2][1 4]]

setdefault

>>a = {}
>>a.setdefault("learning_rate", 1e-2)
>>print(a)

pop

>>b = {}
>>b.pop("update_rule", "sgd")
>>b1 = b.pop("update_rule", "sgd")
>>print(b1)

sgd

>>c = {'update_rule':'dad'}
>>c1 = c.pop("update_rule", "sgd")
>>print(c)
>>print(c1)

{}
dad

更多推荐

双层网络(仿射层+ReLu层)

本文发布于:2024-03-14 13:22:19,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1736559.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:网络   仿射层   ReLu

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!