从零开始学习Hadoop--第5章 压缩
packagecom.brianchen.hadoop;
importjava.net.URI;
importjava.io.InputStream;
importjava.io.OutputStream;
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.io.compress.CompressionCodec;
importorg.apache.hadoop.io.IOUtils;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.fs.FileSystem;
importorg.apache.hadoop.util.ReflectionUtils;
public classDcprsF2F{
public staticvoid main(String[] args) throws Exception{
if(args.length != 3){
System.err.println("Usage:CprsF2F cmps_name src target");
System.exit(2);
}
Class<?>codecClass = Class.forName(args[0]);
Configurationconf = new Configuration();
CompressionCodeccodec = (CompressionCodec)ReflectionUtils.newInstance(codecClass,conf);
InputStreamin = null;
OutputStreamout = null;
FileSystem fs= FileSystem.get(URI.create(args[1]), conf);
try{
in =codec.createInputStream(fs.open(new Path(args[1])),codec.createDecompressor());
out =fs.create(new Path(args[2]));
IOUtils.copyBytes(in,out, conf);
}finally{
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}
“cd~/dcprsf2f”
“jvac-cp /home/brian/usr/hadoop/hadoop-1.2.1/hadoop-core-1.2.1.jar -d./class/ src/*.java”
“jar-cvf dcprsf2f.jar -C ./class/ .”
“cd~/usr/hadoop/hadoop-1.2.1”
“./bin/hadoopjar /home/brian/cprsf2f/cprsf2f.jar com.brianchen.hadoop.DcprsF2Forg.apache.hadoop.io.compress.GzipCodec hello.txt hello_dec.txt”
“./bin/hadoopfs -cat hello.txt”
“./bin/hadoopfs -cat hello_dec.txt”
首先需要确认Hadoop已经启动。压缩文件hello.txt是5.3节创建的,在这里直接使用。将压缩文件hello.txt解压缩到文件hello_c.txt。“org.apache.hadoop.io.compress.GzipCodec”,这个是Hadoop的Gzip压缩器类的类名。压缩完毕之后,执行cat,检查hello.txt内容,这时候显示的是乱码。然后再执行“./bin/hadoopfs -cat hello_dec.txt”,这次会显示出”hello,world!”。