首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 服务器 > 云计算 >

MapReduce实战-分析apatch日记访问页面大小

2013-10-29 
MapReduce实战--分析apatch日志访问页面大小日志文件:220.181.108.151 - - [31/Jan/2012:00:02:32 0800]

MapReduce实战--分析apatch日志访问页面大小

日志文件:

220.181.108.151 - - [31/Jan/2012:00:02:32 +0800] "GET /home.php?mod=space&uid=158&do=album&view=me&from=space HTTP/1.1" 200 8784 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"

208.115.113.82 - - [31/Jan/2012:00:07:54 +0800] "GET /robots.txt HTTP/1.1" 200 582 "-" "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)"

220.181.94.221 - - [31/Jan/2012:00:09:24 +0800] "GET /home.php?mod=spacecp&ac=pm&op=showmsg&handlekey=showmsg_3&touid=3&pmid=0&daterange=2&pid=398&tid=66 HTTP/1.1" 200 10070 "-" "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"

112.97.24.243 - - [31/Jan/2012:00:14:48 +0800] "GET /data/cache/style_2_common.css?AZH HTTP/1.1" 200 57752 "http://forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"

112.97.24.243 - - [31/Jan/2012:00:14:48 +0800] "GET /data/cache/style_2_widthauto.css?AZH HTTP/1.1" 200 1024 "http://forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"


计算页面大小的平均值、最大值和最小值

代码:

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import java.util.Iterator;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class WebLogMessageSizeAggregator {


    public static final Pattern httplogPattern = Pattern

   .compile("([^\\s]+) - - \\[(.+)\\] \"([^\\s]+) (/[^\\s]*) HTTP/[^\\s]+\" [^\\s]+ ([0-9]+)");

    

    public static class AMapper extends Mapper<Object, Text, Text, IntWritable> {


        public void map(Object key, Text value, Context context)throws IOException, InterruptedException {

            Matcher matcher = httplogPattern.matcher(value.toString());

               while (matcher.find())//查找符合pattern的字符串  

            {  

            int size = Integer.parseInt(matcher.group(5));

                context.write(new Text("msgSize"),new IntWritable(size));

            }  

        }     

    }

publicstaticclass AReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        public void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException,

                InterruptedException {

            double tot = 0;

            int count = 0;

            int min = Integer.MAX_VALUE;

            int max = 0;

            Iterator<IntWritable> iterator = values.iterator();

            while (iterator.hasNext()) {

                int value = iterator.next().get();

                tot = tot + value;

                count++;

                if (value < min) {

                    min = value;

                }

                if (value > max) {

                    max = value;

                }

            }

            context.write(new Text("Mean"),new IntWritable((int) tot / count));

            context.write(new Text("Max"),new IntWritable(max));

            context.write(new Text("Min"),new IntWritable(min));

        }

    }


    public static void main(String[] args) throws Exception {

        JobConf conf = new JobConf();

        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        if (otherArgs.length != 2) {

            System.err.println("Usage: <in> <out>");

            System.exit(2);

        }


        Job job = new Job(conf, "WebLogMessageSizeAggregator");

        job.setJarByClass(WebLogMessageSizeAggregator.class);

        job.setMapperClass(AMapper.class);

        job.setReducerClass(AReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

  

运行结果:

Mean 13221

Max 10240000

Min 1


热点排行