首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 软件管理 > 软件架构设计 >

一个hadoop的简略测试例子

2012-11-21 
一个hadoop的简单测试例子从网上找了一个测试例子,统计文本中指定某个单词出现的次数。 调试了下发现几个bu

一个hadoop的简单测试例子
从网上找了一个测试例子,统计文本中指定某个单词出现的次数。
调试了下发现几个bug,我把修改后的分享下。
eclipse下编译
vm参数:-Xms64m -Xmx512m
程序参数

package com.run.ayena.distributed.test;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;////统计文本中指定某个单词出现的次数public class SingleWordCount {public static class SingleWordCountMapper extendsMapper<Object, Text, Text, IntWritable> {private final static IntWritable one = new IntWritable(1);private Text val = new Text();public void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());String keyword = context.getConfiguration().get("word");while (itr.hasMoreTokens()) {String nextkey = itr.nextToken();if (nextkey.trim().equals(keyword)) {val.set(nextkey);context.write(val, one);} else {// do nothing}}}}public static class SingleWordCountReducer extendsReducer<Text,IntWritable,Text,IntWritable> {private IntWritable result = new IntWritable();public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable val : values) {sum += val.get();}result.set(sum);context.write(key, result);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 3) {System.err.println("Usage: wordcount  ");System.exit(2);}// 输入指定的单词conf.set("word", otherArgs[2]);// 指定系统路conf.set("mapred.system.dir", "/cygdrive/e/workspace_hadoop/SingleWordCount/");// 设置运行的job名称Job job = new Job(conf, "word count");// 设置运行的job类job.setJarByClass(SingleWordCount.class);// 设置Mapperjob.setMapperClass(SingleWordCountMapper.class);// 设置本地聚合类,该例本地聚合类同Reduer类job.setCombinerClass(SingleWordCountReducer.class);// 设置Reduerjob.setReducerClass(SingleWordCountReducer.class);// 设置Map的输出job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// 设置Reducer输出的key类型job.setOutputKeyClass(Text.class);// 设置Reducer输出的value类型job.setOutputValueClass(IntWritable.class);// 设置输入和输出的目录FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));// 执行,直到结束就退出System.exit(job.waitForCompletion(true) ? 0 : 1);}}

热点排行