我试图在MNIST数据集上执行PCA,作为我需要生成特征向量并可视化顶部特征的过程的一部分。 以下是我的算法:
加载图片 减去均值 生成协方差矩阵 导出特征向量和特征值这是一个相当简单的算法运行; 我的第一个任务是将前10个特征向量可视化为图像。 以下是我到目前为止的代码:
__author__ = "Ajay Krishna Teja Kavuri" import numpy as np import random from mnist import MNIST import matplotlib.pylab as plt class PCAMNIST: #Initialization def __init__(self): #Load MNIST datset mnistData = MNIST('./mnistData') self.imgTrain,self.lblTrain=mnistData.load_training() self.imgTrainSmpl=self.imgTrain[:60000] np.seterr(all='warn') #1. Subtract the mean because the PCA will work better def subMean(self): try: self.sumImg = np.empty([784,]) #calculate the sum for img in self.imgTrainSmpl: imgArr = np.asarray(img) self.sumImg = np.add(imgArr,self.sumImg) #Calculate the mean array self.meanImg = self.sumImg/(len(self.imgTrainSmpl)) self.meanImg = np.nan_to_num(self.meanImg) #subtract it out index=0 for img in self.imgTrainSmpl: imgArr = np.asarray(img) self.imgTrainSmpl[index] = np.subtract(imgArr,self.meanImg).tolist() index += 1 #for img in self.imgTrainSmpl: #print img except: print Exception #2. get the covaraince matrix for each digit def getCov(self): self.imgCov=[] dgtArr = np.asarray(self.imgTrainSmpl).T dgtCov = np.cov(dgtArr) self.imgCov.append(dgtCov) #for img in self.imgCov: #print img #3. get the eigen vectors from the covariance matrix def getEigen(self): self.eigVec=[] self.eigVal=[] dgtArr = np.asarray(self.imgCov) tmpEigVal,tmpEigVec=np.linalg.eig(dgtArr) self.eigVal.append(tmpEigVal.tolist()) self.eigVec.append(tmpEigVec.tolist()) #print "\nEigen values:\n" #for img in self.eigVal: #print img #print "\nEigen vectors:\n" #for img in self.eigVec: #print img def sortEV(self): self.eigValArr = np.asarray(self.eigVal[0][0]) self.eigVecArr = np.asarray(self.eigVec[0][0]) self.srtdInd = np.argsort(np.abs(self.eigValArr)) self.srtdEigValArr = self.eigValArr[self.srtdInd] self.srtdEigVecArr = self.eigVecArr[self.srtdInd] self.srtdEigVec = self.srtdEigVecArr.real.tolist() #print self.srtdEigValArr[0] print len(self.srtdInd.tolist()) #print self.eigVec[self.srtdInd[0]] #print np.asarray(self.srtdEigVec).shape #for img in self.srtdEigVecArr: #print img #self.drawEig() def plotVal(self): """ plt.figure() plt.scatter(np.asarray(self.eigVal).real) plt.show() """ def drawEig(self): for vec in self.srtdEigVec[:10]: self.drawEigV(vec) def drawEigV(self,digit): plt.figure() fig=plt.imshow(np.asarray(digit).reshape(28,28),origin='upper') fig.set_cmap('gray_r') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.savefig(str(random.randint(0,10000))+".png") #plt.show() plt.close() def drawChar(self,digit): plt.figure() fig=plt.imshow(np.asarray(digit).reshape(28,28),clim=(-1,1.0),origin='upper') fig.set_cmap('gray_r') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.show() plt.close() def drawSmpl(self): for img in self.imgTrainSmpl: self.drawChar(img) def singleStep(self): self.val, self.vec = np.linalg.eig(np.cov(np.array(self.imgTrainSmpl).transpose())) self.srtd = np.argsort(self.val)[::-1] print self.val #asnmnt4=PCAMNIST() #asnmnt4.singleStep() asnmnt4=PCAMNIST() asnmnt4.subMean() asnmnt4.getCov() asnmnt4.getEigen() asnmnt4.sortEV() asnmnt4.drawEig() #asnmnt4.plotVal() """ asnmnt4.getSorted() asnmnt4.printTopEigenVal() """虽然上面的代码运行完美且所有数组大小都与给定的数据集匹配,但它会生成以下图像的特征向量:
显然,特征向量没有意义,因为它们必须表示数据集的特征,在这种情况下应该是数字。 任何帮助表示赞赏。 如果您尝试运行此代码,则可能必须安装MNIST软件包并从链接下载数据。
I am trying to perform PCA on MNIST dataset, as part of the process I need to generate the eigen vectors and visualize the top features. Following is my algorithm:
Load images Subtract mean Generate Covariance matrix Derive eigen vectors and eigen valuesIt's fairly a simple algorithm to run; my first task is to visualize the top 10 eigen vectors as images. Following is the code that I have so far:
__author__ = "Ajay Krishna Teja Kavuri" import numpy as np import random from mnist import MNIST import matplotlib.pylab as plt class PCAMNIST: #Initialization def __init__(self): #Load MNIST datset mnistData = MNIST('./mnistData') self.imgTrain,self.lblTrain=mnistData.load_training() self.imgTrainSmpl=self.imgTrain[:60000] np.seterr(all='warn') #1. Subtract the mean because the PCA will work better def subMean(self): try: self.sumImg = np.empty([784,]) #calculate the sum for img in self.imgTrainSmpl: imgArr = np.asarray(img) self.sumImg = np.add(imgArr,self.sumImg) #Calculate the mean array self.meanImg = self.sumImg/(len(self.imgTrainSmpl)) self.meanImg = np.nan_to_num(self.meanImg) #subtract it out index=0 for img in self.imgTrainSmpl: imgArr = np.asarray(img) self.imgTrainSmpl[index] = np.subtract(imgArr,self.meanImg).tolist() index += 1 #for img in self.imgTrainSmpl: #print img except: print Exception #2. get the covaraince matrix for each digit def getCov(self): self.imgCov=[] dgtArr = np.asarray(self.imgTrainSmpl).T dgtCov = np.cov(dgtArr) self.imgCov.append(dgtCov) #for img in self.imgCov: #print img #3. get the eigen vectors from the covariance matrix def getEigen(self): self.eigVec=[] self.eigVal=[] dgtArr = np.asarray(self.imgCov) tmpEigVal,tmpEigVec=np.linalg.eig(dgtArr) self.eigVal.append(tmpEigVal.tolist()) self.eigVec.append(tmpEigVec.tolist()) #print "\nEigen values:\n" #for img in self.eigVal: #print img #print "\nEigen vectors:\n" #for img in self.eigVec: #print img def sortEV(self): self.eigValArr = np.asarray(self.eigVal[0][0]) self.eigVecArr = np.asarray(self.eigVec[0][0]) self.srtdInd = np.argsort(np.abs(self.eigValArr)) self.srtdEigValArr = self.eigValArr[self.srtdInd] self.srtdEigVecArr = self.eigVecArr[self.srtdInd] self.srtdEigVec = self.srtdEigVecArr.real.tolist() #print self.srtdEigValArr[0] print len(self.srtdInd.tolist()) #print self.eigVec[self.srtdInd[0]] #print np.asarray(self.srtdEigVec).shape #for img in self.srtdEigVecArr: #print img #self.drawEig() def plotVal(self): """ plt.figure() plt.scatter(np.asarray(self.eigVal).real) plt.show() """ def drawEig(self): for vec in self.srtdEigVec[:10]: self.drawEigV(vec) def drawEigV(self,digit): plt.figure() fig=plt.imshow(np.asarray(digit).reshape(28,28),origin='upper') fig.set_cmap('gray_r') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.savefig(str(random.randint(0,10000))+".png") #plt.show() plt.close() def drawChar(self,digit): plt.figure() fig=plt.imshow(np.asarray(digit).reshape(28,28),clim=(-1,1.0),origin='upper') fig.set_cmap('gray_r') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.show() plt.close() def drawSmpl(self): for img in self.imgTrainSmpl: self.drawChar(img) def singleStep(self): self.val, self.vec = np.linalg.eig(np.cov(np.array(self.imgTrainSmpl).transpose())) self.srtd = np.argsort(self.val)[::-1] print self.val #asnmnt4=PCAMNIST() #asnmnt4.singleStep() asnmnt4=PCAMNIST() asnmnt4.subMean() asnmnt4.getCov() asnmnt4.getEigen() asnmnt4.sortEV() asnmnt4.drawEig() #asnmnt4.plotVal() """ asnmnt4.getSorted() asnmnt4.printTopEigenVal() """Although the above code runs perfectly and all the array sizes match the given dataset, it generates the following images a eigen vectors:
Clearly the eigen vectors make no sense as they have to represent the features of the dataset which in this case should be digits. Any help is appreciated. If you are trying to run this code you might have to install the MNIST package and download data from link.
最满意答案
您正在绘制特征向量矩阵的行 。 特征向量位于矩阵的列中,您可以在np.linalg.eig文档中看到。
你应该改变
self.eigVec.append(tmpEigVec.tolist())至
self.eigVec.append(np.transpose(tmpEigVec).tolist())我相信它会按预期工作。
You're plotting the rows of the eigenvector matrix. The eigenvectors are in the columns of the matrix, as you can see in the np.linalg.eig documentation.
You should change
self.eigVec.append(tmpEigVec.tolist())to
self.eigVec.append(np.transpose(tmpEigVec).tolist())and I believe it will work as expected.
更多推荐
发布评论