[实验]avro与non-avro的mapred例子-wordcount改写
avro非常适合用于hadoop。在开发的时候可能有这样的场景,输入的文件是non-avro的,输出的文件是avro的。这样就需要一个是非avro的mapper和一个avro的reducer。下面通过改写wordcount例子演示这个过程。
Mapper
public class AvroWordCount extends Configured implements Tool {public static void main(String[] args) throws Exception {int exitCode = ToolRunner.run(new AvroWordCount(), args);System.exit(exitCode);}@Overridepublic int run(String[] args) throws Exception {if(args.length != 2){System.out.printf("Usage %s [generic options] <in> <out>\n", getClass().getName());ToolRunner.printGenericCommandUsage(System.out);return -1;}JobConf conf = new JobConf(AvroWordCount.class); conf.setJobName("wordcount"); conf.set("fs.default.name", "hdfs://node04vm01:9000"); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), Schema.create(Type.INT)));conf.setMapperClass(WordCountMapper.class);AvroJob.setReducerClass(conf, WordCountReducer.class); conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0;}}