Mapreduce实例-TopK
public class TopK extends Configured implements Tool { public static class TopKMapper extends Mapper<Object, Text, NullWritable, LongWritable> { public static final int K = 100; private TreeMap<Long, Long> tm = new TreeMap<Long, Long>(); @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { try { long k = Integer.parseInt(value.toString().substring(0, 9)); tm.put(k, k); if (tm.size() > K) { tm.remove(tm.firstKey()); } } catch (Exception e) { context.getCounter("TopK", "errorlog").increment(1); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Long text : tm.values()) { context.write(NullWritable.get(), new LongWritable(text)); } } } public static class TopKReducer extends Reducer<NullWritable, LongWritable, NullWritable, LongWritable> { public static final int K = 100; private TreeMap<Long, Long> mt = new TreeMap<Long, Long>(); @Override protected void reduce(NullWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { for (LongWritable value : values) { mt.put(value.get(), value.get()); if (mt.size() > K) { mt.remove(mt.firstKey()); } } for (Long val : mt.descendingKeySet()) { context.write(NullWritable.get(), new LongWritable(val)); } } } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "TopKNum"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(TopKMapper.class); job.setReducerClass(TopKReducer.class); job.setJarByClass(TopK.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws IOException, InterruptedException { try { if (args.length < 2) { System.err.println("ERROR: Parameter format length "); System.exit(0); } int ret = ToolRunner.run(new TopK(), args); System.exit(ret); } catch (Exception e) { e.printStackTrace(); } }}
上面是求最大100个,如果求最小的100 个数,改map和reduce中的mt.remove(mt.firstKey());为mt.remove(mt.lastKey())