系统之基于二部图的个性化推荐系统原理及C++实现"/>
推荐系统之基于二部图的个性化推荐系统原理及C++实现
推荐系统之基于二部图的个性化推荐系统原理及C++实现
时间 2014-12-27 16:43:00 博客园-原创精华区 原文 .html 主题 推荐系统 1 #include <iostream>
2 #include <fstream>
3 #include <vector>
4 #include <string>
5 #include <vector>
6 #include <algorithm>
7 #include <iomanip>
8 using namespace std;
9
10 //从TXT中读入数据到矩阵(二维数组)
11 template <typename T>
12 vector<vector<T> > txtRead(string FilePath,int row,int col)
13 {
14 ifstream input(FilePath);
15 if (!input.is_open())
16 {
17 cerr << "File is not existing, check the path: \n" << FilePath << endl;
18 exit(1);
19 }
20 vector<vector<T> > data(row, vector<T>(col,0));
21 for (int i = 0; i < row; ++i)
22 {
23 for (int j = 0; j < col; ++j)
24 {
25 //因为这里针对的情况是用户只给出对items的喜欢与不喜欢的情况,而movielens
26 //是一个1-5的评分数据,所以把分数达到3的看作是喜欢,标记为1,小于3的视为
27 // 不喜欢,置为0
28 input >> data[i][j];
29 if (data[i][j] >= 3)
30 data[i][j] = 1;
31 else
32 data[i][j] = 0;
33 }
34 }
35 return data;
36 }
37
38 //把矩阵中的数据写入TXT文件
39 template<typename T>
40 void txtWrite(vector<vector<T> > Matrix, string dest)
41 {
42 ofstream output(dest);
43 vector<vector<T> >::size_type row = Matrix.size();
44 vector<T>::size_type col = Matrix[0].size();
45 for (vector<vector<T> >::size_type i = 0; i < row; ++i)
46 {
47 for (vector<T>::size_type j = 0; j < col; ++j)
48 {
49
50 output << setprecision(3)<< Matrix[i][j] << "\t";
51 }
52 output << endl;
53 }
54 }
55
56 // 求两个向量的内积
57 double InnerProduct(std::vector<double> A, std::vector<double> B)
58 {
59 double res = 0;
60 for(std::vector<double>::size_type i = 0; i < A.size(); ++i)
61 {
62 res += A[i] * B[i];
63 }
64 return res;
65 }
66
67 //矩阵转置操作
68 template<typename T>//
69 vector<vector<T> > Transpose(vector<vector<T> > Matrix)
70 {
71 unsigned row = Matrix.size();
72 unsigned col = Matrix[0].size();
73 vector<vector<T> > Trans(col,vector<T>(row,0));
74 for (unsigned i = 0; i < col; ++i)
75 {
76 for (unsigned j = 0; j < row; ++j)
77 {
78 Trans[i][j] = Matrix[j][i];
79 }
80 }
81 return Trans;
82 }
83
84 //求一个向量中所有元素的和
85 template<typename T>
86 T SumVector(vector<T> vec)
87 {
88 T res = 0;
89
90 for (vector<T>::size_type i = 0; i < vec.size(); ++i)
91 res += vec[i];
92 return res;
93 }
94
95 //对一个向量中的元素进行降序排列,返回重排后的元素在原来
96 //向量中的索引
97 bool IsBigger(double a, double b)
98 {
99 return a >= b;
100 }
101 vector<unsigned> DescendVector(vector<double> vec)
102 {
103 vector<double> tmpVec = vec;
104 sort(tmpVec.begin(), tmpVec.end(), IsBigger);
105 vector<unsigned> idx;
106 for (vector<double>::size_type i = 0; i < tmpVec.size(); ++i)
107 {
108 for (vector<double>::size_type j = 0; j < vec.size(); ++j)
109 {
110 if (tmpVec[i] == vec[j])
111 idx.push_back(j);
112 }
113 }
114 return idx;
115 }
116
117
118 //基于概率传播(ProbS)的二部图的推荐函数,data是训练数据
119 vector<vector<double> > ProbS(vector<vector<double> > data)
120 {
121 auto row = data.size();
122 auto col = data[0].size();
123 vector<vector<double> > transData = Transpose(data);
124
125 //第一步利用概率传播机制计算权重矩阵
126 vector<vector<double> > weights(col, vector<double>(col, 0));
127 for (vector<double>::size_type i = 0; i < col; ++i)
128 {
129 for (vector<double>::size_type j = 0; j < col; ++j)
130 {
131 double degree = SumVector<double>(transData[j]);
132 double sum = 0;
133 for (vector<double>::size_type k = 0; k < row; ++k)
134 {
135 sum += transData[i][k] * transData[j][k] / SumVector<double>(data[k]);
136 }
137 if (degree)
138 weights[i][j] = sum / degree;
139 }
140 }
141
142 //第二步利用权重矩阵和训练数据集针对每个用户对每一个item评分
143 vector<vector<double> > scores(row, vector<double>(col, 0));
144 for (vector<double>::size_type i = 0; i < row; ++i)
145 {
146 for (vector<double>::size_type j = 0; j < col; ++j)
147 {
148 //等于0的地方代表user i 还木有评价过item j,需要预测
149 if (0 == data[i][j])
150 scores[i][j] = InnerProduct(weights[j],data[i]);
151 }
152 }
153 return scores;
154 }
155
156 //计算推荐结果的命中率:推荐的items中用户确实喜欢的items数量/推荐的items数量
157 //用户确实喜欢的items是由测试集给出,length表示推荐列表最长为多少,这里将测出
158 //推荐列表长度由1已知增加到length过程中,推荐命中率的变化
159 vector<vector<double> > ComputeHitRate(vector<vector<double> > scores, vector<vector<double> > test,
160 unsigned length)
161 {
162 auto usersNum = test.size();
163 auto itemsNum = test[0].size();
164
165 vector<vector<unsigned> > sortedIndex;
166 //因为只是对测试集中的用户和items进行测试,于是选取与测试集大小一样的预测数据
167 vector<vector<double> > selectedScores(usersNum, vector<double>(itemsNum,0));
168 vector<double> line;
169 for (unsigned i = 0; i < usersNum; ++i)
170 {
171 for (unsigned j = 0; j < itemsNum; ++j)
172 {
173 line.push_back(scores[i][j]);
174 }
175 sortedIndex.push_back(DescendVector(line));
176 line.clear();
177 }
178 //hitRate的第一列存储推荐列表的长度,第二列存储对应的命中率
179 vector<vector<double> > hitRate(length);
180 for (unsigned k = 1; k <= length; ++k)
181 {
182 hitRate[k-1].push_back(k);
183 double Counter = 0;
184 for (unsigned i = 0; i < usersNum; ++i)
185 {
186 for (unsigned j = 0; j < k; ++j)
187 {
188 unsigned itemIndex = sortedIndex[i][j];
189 if (test[i][itemIndex])
190 ++Counter;
191 }
192 }
193 hitRate[k-1].push_back(Counter / (k * usersNum));
194 }
195 return hitRate;
196 }
197 int main()
198 {
199 string FilePath1("data.txt");
200 //string FilePath2("E:\\Matlab code\\recommendation system\\data\\movielens\\test.txt");
201
202 int row = 10;
203 int col = 10;
204 cout << "数据读取中..." << endl;
205 vector<vector<double> > train = txtRead<double>(FilePath1, row, col);
206 //vector<vector<double> > test = txtRead<double>(FilePath2, 462, 1591);
207
208 cout << "利用二部图网络进行评分预测..." << endl;
209 vector<vector<double> > predictScores = ProbS(train);
210 txtWrite(predictScores, "predictScores.txt");
211 /*
212 cout << "计算命中率..." << endl;
213 vector<vector<double> > hitRate = ComputeHitRate(predictScores, test, 1591);
214
215 txtWrite(hitRate, "hitRate.txt");
216 cout << "命中率结果保存完毕!" << endl;
217 */
218 return 0;
219 }
更多推荐
推荐系统之基于二部图的个性化推荐系统原理及C++实现
发布评论