ubuntu9.04+hadoop0.20.2+eclipse环境搭建
看hadoop也有一段时间了,今天花了一些时间把整个开发环境搭起来了,期间遇到了不小的麻烦,经过查阅大量资料,终于搞定了!
由于我的电脑配置不好,所以在实验室ubuntu服务器上搭建了单机的环境,然后再我的电脑用eclipse上传编写好的程序。
1.安装JDK6
这个不用多说,下一个bin文件,修改一下权限,配置一下环境变量就可以了。
2. 配置SSH
新增hadoop组及同名用户:
$ sudo addgroup hadoop
$ sudo adduser --ingroup hadoop hadoop
接下来做些特别的工作:
$ su
$ chmod u+x /etc/sudoers
$ vim /etc/sudoers
在 root ALL=(ALL)的下一行加上:
hadoop ALL=(ALL)
$ chmod u-x /etc/sudoers
$ exit
安装ssh-server:
$ sudo apt-get install openssh-server
简历SSH KEY:
$ su - hadoop
$ ssh-keygen -t rsa -P ""
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory '/home/hadoop/.ssh'.
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
9d:47:ab:d7:22:54:f0:f9:b9:3b:64:93:12:75:81:27 hadoop@ubuntu
让其不输入密码就能登录:
hadoop@ubuntu:~$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
$ sudo /etc/init.d/ssh reload
使用:
$ ssh localhost
看看是不是直接ok了。
3.安装Hadoop0.20.2
将包中内容解压到/usr/local/hadoop,并改变其所有者:
$ sudo chown -R hadoop:hadoop hadoop
配置Hadoop:
$ cd /usr/local/hadoop
$ vim conf/core-site.xml
将内容改为:
<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!-- Put site-specific property overrides in this file. --><configuration><property><name>fs.default.name</name><value>hdfs://59.72.109.206:9000</value></property><property><name>dfs.replication</name><value>1</value></property><property><name>hadoop.tmp.dir</name><value>/home/hadoop/tmp</value></property></configuration>
<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!-- Put site-specific property overrides in this file. --><configuration><property><name>mapred.job.tracker</name><value>59.72.109.206:9001</value></property></configuration>
package examples;import java.io.IOException;import java.util.Random;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparator;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Reducer.Context;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class AdvancedWordCount {public static class TokenizerMapper extendsMapper<Object, Text, Text, IntWritable> {private final static IntWritable one = new IntWritable(1);private Text word = new Text();private String pattern = "[^\\w]";@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();System.out.println("-------line todo: " + line);line = line.replaceAll(pattern, " ");System.out.println("-------line done: " + line);StringTokenizer itr = new StringTokenizer(line.toString());while (itr.hasMoreTokens()) {word.set(itr.nextToken());context.write(word, one);}}}public static class IntSumReducer extendsReducer<Text, IntWritable, Text, IntWritable> {private IntWritable result = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// TODO Auto-generated method stubint sum = 0;for (IntWritable val : values) {sum += val.get();}result.set(sum);context.write(key, result);}}public static class MyInverseMapper extendsMapper<Object, Text, IntWritable, Text> {@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {String[] keyAndValue = value.toString().split("\t");System.out.println("---------------->" + value);System.out.println("--------0------->" + keyAndValue[0]);System.out.println("--------1------->" + keyAndValue[1]);context.write(new IntWritable(Integer.parseInt(keyAndValue[1])), new Text(keyAndValue[0]));}}public static class IntWritableDecreasingComparator extendsIntWritable.Comparator {@Overridepublic int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {// TODO Auto-generated method stubreturn -super.compare(b1, s1, l1, b2, s2, l2);}public int compare(WritableComparator a, WritableComparator b) {// TODO Auto-generated method stubreturn -super.compare(a, b);}}public static boolean countingJob(Configuration conf, Path in, Path out) throws IOException, InterruptedException, ClassNotFoundException {Job job = new Job(conf, "wordcount");job.setJarByClass(AdvancedWordCount.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, in);FileOutputFormat.setOutputPath(job, out);return job.waitForCompletion(true);}public static boolean sortingJob(Configuration conf, Path in, Path out) throws IOException, InterruptedException, ClassNotFoundException {Job job = new Job(conf, "sort");job.setJarByClass(AdvancedWordCount.class);job.setMapperClass(MyInverseMapper.class);job.setOutputKeyClass(IntWritable.class);job.setOutputValueClass(Text.class);job.setSortComparatorClass(IntWritableDecreasingComparator.class);FileInputFormat.addInputPath(job, in);FileOutputFormat.setOutputPath(job, out);return job.waitForCompletion(true);}public static void main(String[] args) {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();Path temp = new Path("wordcount-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));boolean a = false, b = false;Path in = new Path(otherArgs[0]);Path out = new Path(otherArgs[1]);if(otherArgs.length != 2)System.exit(2);try {a = AdvancedWordCount.countingJob(conf, in, temp);b = AdvancedWordCount.sortingJob(conf, temp, out);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (ClassNotFoundException e) {// TODO Auto-generated catch blocke.printStackTrace();} finally {try {FileSystem.get(conf).delete(temp, true);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}if (!a || !b)try {FileSystem.get(conf).delete(out, true);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}