mahout源码分析之DistributedLanczosSolver(二)Job1
Mahout版本:0.7,hadoop版本:1.0.4,jdk:1.7.0_25 64bit。
在上篇blog中的最后终端的信息可以看到,svd算法一共有5个Job任务。下面通过Mahout中DistributedLanczosSolver源代码来一个个分析:
为了方便后面的数据随时对照,使用wine.dat修改后的数据,如下(5行,13列):
4. TimesSquareMapper:这个mapper使用的是之前的job模式,何谓之前的job模式?看下面的:
编写一个仿制的mapper,可以得到下面的输出结果:
package mahout.fansy.svd;import java.io.IOException;import java.util.Iterator;import java.util.List;import java.util.Map;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapred.OutputCollector;import org.apache.mahout.math.DenseVector;import org.apache.mahout.math.Vector;import org.apache.mahout.math.VectorWritable;import org.apache.mahout.math.function.Functions;import com.google.common.collect.Lists;import mahout.fansy.utils.read.ReadArbiKV;public class TimesSquareMapperFollow {/** * TimesSquareMapper 仿制代码 */private Vector outputVector; private Vector inputVector;public static void main(String[] args) throws IOException {TimesSquareMapperFollow ts=new TimesSquareMapperFollow();//ts.getInputVector();ts.map();ts.close();}public List<VectorWritable> getInputVector() throws IOException{List<VectorWritable> list=Lists.newArrayList();String path="hdfs://ubuntu:9000/svd/input/wine";Map<Writable,Writable> map=ReadArbiKV.readFromFile(path);Iterator iter = map.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); VectorWritable val = (VectorWritable)entry.getValue(); list.add(val);} path="hdfs://ubuntu:9000/svd/temp/22772135186028/DistributedMatrix.times.inputVector/23066524612809";Map<Writable,Writable> input=ReadArbiKV.readFromFile(path);inputVector=((VectorWritable)input.get(null)).get();outputVector=new DenseVector(13);return list;}/* * 仿造map函数 */public void map() throws IOException{List<VectorWritable >list=getInputVector();for(VectorWritable v:list){double d = scale(v); if (d == 1.0) { outputVector.assign(v.get(), Functions.PLUS); } else if (d != 0.0) { outputVector.assign(v.get(), Functions.plusMult(d)); }}}protected double scale(VectorWritable v) { return v.get().dot(inputVector); }/* * 仿造close函数 */public void close(){System.out.println("outputVector:");System.out.println(outputVector);}}分享,成长,快乐
转载请注明blog地址:http://blog.csdn.net/fansy1990