jdk、lang和guava split 字符串效率测试
最近要做一个数据采集分析的系统,每五分钟采集6G数据,采集数据每一行为一条完整记录,对一条记录split处理。为了找到最优split方法,测试jdk、lang和guava split 字符串效率。
测试读取250m的文件,有100万行数据,测试数据对比:
@AxisRange(min = 0)@BenchmarkMethodChart(filePrefix = "split-benchmark-barchart")public class SplitTest {@Rulepublic BenchmarkRule benchmarkRun = new BenchmarkRule();private static final Splitter splitter = Splitter.on('|').omitEmptyStrings();@Testpublic void jdkSplitTest() throws IOException {File file = new File("G:/huawu/PS_FileInnerMon1_20130723170104-14076.dat");Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {int count;@Overridepublic boolean processLine(String line) throws IOException {count++;line.split("|");return true;}@Overridepublic Integer getResult() {return count;}});}@Testpublic void langSplitTest() throws IOException {File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {int count;@Overridepublic boolean processLine(String line) throws IOException {count++;StringUtils.split(line, '|');return true;}@Overridepublic Integer getResult() {return count;}});}@Testpublic void guavaSplitTest() throws IOException {File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {int count;@Overridepublic boolean processLine(String line) throws IOException {count++;splitter.split(line);return true;}@Overridepublic Integer getResult() {return count;}});}}