mahout推荐引擎使用hadoop(二) 计算协同矩阵
?
?????? 第二步,计算协同矩阵,主要在RowSimilarityJob 这个类中完成
?
??
public static class SimilarityReducer extends Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> { @Override protected void reduce(IntWritable row, Iterable<VectorWritable> partialDots, Context ctx) throws IOException, InterruptedException { Iterator<VectorWritable> partialDotsIterator = partialDots.iterator(); //取一个vecotr作为该item的行向量 Vector dots = partialDotsIterator.next().get(); while (partialDotsIterator.hasNext()) { Vector toAdd = partialDotsIterator.next().get(); Iterator<Vector.Element> nonZeroElements = toAdd.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); //nonZeroElement.index()为itemid,将另一个vecotr中itemid的value加进去 dots.setQuick(nonZeroElement.index(), dots.getQuick(nonZeroElement.index()) + nonZeroElement.get()); } } //最后得到的dots是协同矩阵中行号为row的一行,行中元素是item对其他的item的相似度 Vector similarities = dots.like(); double normA = norms.getQuick(row.get()); Iterator<Vector.Element> dotsWith = dots.iterateNonZero(); while (dotsWith.hasNext()) { Vector.Element b = dotsWith.next(); double similarityValue = similarity.similarity(b.get(), normA, norms.getQuick(b.index()), numberOfColumns); if (similarityValue >= treshold) { similarities.set(b.index(), similarityValue); } } if (excludeSelfSimilarity) { similarities.setQuick(row.get(), 0); } ctx.write(row, new VectorWritable(similarities)); } }
?
?
?
?
?
?