Twenty Newsgroups Classification实例任务之TrainNaiveBayesJob(一)
接着上篇blog,继续看log里面的信息如下:
下面看Mapper,IndexInstancesMapper的主要代码如下:
这里额可以通过下面的代码来测试相关的文件:package mahout.fansy.test.bayes.read;import java.io.IOException;import java.net.URI;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.util.ReflectionUtils;import org.apache.mahout.math.VectorWritable;public class ReadLabelIndex { /** * @param args */public static Configuration conf=new Configuration();static String fPath="";static String trainPath="";static{conf.set("mapred.job.tracker", "ubuntu:9001");fPath="hdfs://ubuntu:9000/home/mahout/mahout-work-mahout/labelindex"; // lableindex 数据文件trainPath="hdfs://ubuntu:9000/home/mahout/mahout-work-mahout/"+"20news-train-vectors/part-r-00000"; // 训练样本数据}public static void main(String[] args) throws IOException {//readFromFile(fPath);readFromFile(trainPath);}/** * 读取LabelIndex文件 * @param fPath * @return * @throws IOException */public static Map<Writable,Writable> readFromFile(String fPath) throws IOException{FileSystem fs = FileSystem.get(URI.create(fPath), conf); Path path = new Path(fPath); Map<Writable,Writable> map=new HashMap<Writable,Writable>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { // Writable k=; // 如何实现Writable的深度复制? //map.put(key, value); System.out.println(key.toString()+", "+value.toString()); System.exit(-1);// 只打印第一条记录 } } finally { IOUtils.closeStream(reader); } return map;}}
这里在写的时候想做一个通用的,所以需要对Writable深度复制,但是一时间还没有想到办法,所以这里留个问题,有时间解决。
分享,成长,快乐
转载请注明blog地址:http://blog.csdn.net/fansy1990