还是命名为StreamParser较贴切些(狂踩之下必有勇夫)
参看http://iwinyeah.iteye.com/admin/blogs/171514,一声~救~命招至众人狂踩,愤怒中爆发了以下代码。这样也不用管它什么编码模式了,只要JVM支持就可以运作。(这里没有判断编码格式,使用前你必须预知编码格式或先行检查)
// 测试public void testTestStreamParser() {if(StreamParser.isSupportEncode("UTF-16LE")){System.out.println("The System support UTF-16LE");}StreamParser sp = new StreamParser("/res/txt/classmate.txt", "UTF-16LE");sp.addSplitString("\r\n");sp.addSplitString("@");System.out.println(sp.next());System.out.println(sp.next());assertEquals("TCW谭察崴",sp.next());assertEquals("--------------------",sp.next());assertEquals("广州先烈东路",sp.next());assertTrue(sp.skipString("广州市第一人民医院"));assertTrue(sp.skipString("13802808384"));sp.next();assertEquals("gdtcw",sp.next());assertEquals("163.com",sp.next());assertTrue(!sp.skipString("午餐肉"));assertTrue(sp.eos());sp.close();}
package util;/** * -------------------------------------------------- * 输入流分析器 * -------------------------------------------------- * 主要用于从输入流中按分隔符分析出字段 * -------------------------------------------------- * * @ahthor iwinyeah 李永超 * @version 1.0.0 * */import java.io.IOException;import java.io.InputStream;import java.io.UnsupportedEncodingException;public class StreamParser {// 输入流private InputStream is;// 输入流编码格式private String encode;// 已设定的分隔字符串的数量private int splitCount = 0;// 可设置的分隔字符串的数量private final static int SPLIT_SIZE = 8;// 分隔字符串字节数组private byte[][] splitBytes = new byte[SPLIT_SIZE][];// 分隔符匹配位移量private int[][] splitShifts = new int[SPLIT_SIZE][];// 当前已读入的分隔字符串字节位置private int[] splitChecks = new int[SPLIT_SIZE];// 初始化时缓冲区大小private final static int INIT_SIZE = 80;// 每次增长的缓冲区大小private final static int EXPAND_SIZE = 16;// 字节缓冲区private byte[] byteBuffer = new byte[INIT_SIZE];// 已读入的字节数private int bytesRead;// 输入流结束标志private boolean eos = false;// 用于测试private final static String testString = "iwinyeah";/** * 测试系统是否支持指定的编码格式 * * @param String * enc 编码名称 * @return boolean true 支持,false 不支持 */public static boolean isSupportEncode(String enc) {try {testString.getBytes(enc);} catch (UnsupportedEncodingException e) {return false;}return true;}/** * 构建函数 * * @param String * rsName 资源文件名称(含路径) * @param String * enc 资源文件编码格式 */public StreamParser(String rsName, String enc) {InputStream istream = this.getClass().getResourceAsStream(rsName);if (istream == null) {eos = true;}is = istream;encode = enc;}/** * 构建函数 * * @param InputStream * iStream 输入流 * @param String * enc 资源文件编码格式 */public StreamParser(InputStream iStream, String enc) {is = iStream;encode = enc;}/** * 是否已读到了输入流结束符 * * @return boolean true 是 false 否 */public boolean eos() {return eos;}/** 关闭输入流 */public void close() {if (is != null) {try {is.close();} catch (IOException e) {//}}}/** * 增加输入流所使用的分隔字符串 * * @param String * splitString 输入流所使用的分隔字符串 * @return 当前已有的分隔字符串数量 */public int addSplitString(String splitString) {// 若已达到最大分隔字符串数则返回-1if (splitCount >= SPLIT_SIZE) {return -1;}try {// 分隔字符串分析位置置0splitChecks[splitCount] = 0;// 将分隔字符串转换为流中的字节数组splitBytes[splitCount] = splitString.getBytes(encode);// 生成匹配移位数组splitShifts[splitCount] = getShift(splitBytes[splitCount]);splitCount++;} catch (Exception e) {return -1;}return splitCount;}/** * 跳过指定字符串 * * @param String * subString 要跳过的字符串 * @return boolean 字符串是否存在 */public boolean skipString(String skip) {if (skip == null || eos) {return true;}byte[] skipBytes = null;try {skipBytes = skip.getBytes(encode);} catch (Exception e) {return false;}int[] shift = getShift(skipBytes);int nextCheck = 0;int c = 0;byte bc = 0;try {while (c != -1) { // 一直读到输入流结束c = is.read();bc = (byte) c;while (nextCheck > 0 && bc != skipBytes[nextCheck]) {nextCheck = shift[nextCheck - 1];}if (bc == skipBytes[nextCheck]) {nextCheck++;}if (nextCheck == skipBytes.length) { // 成功找到return true;}}} catch (IOException e) {//}if (c == -1) {eos = true;}return false;}/** * 从输入流中读入下一个字段 * * @return String 读入的字段 */public String next() {// 输入流已经结束则返回空值if (eos) {return null;}// 字节指针回0bytesRead = 0;// 重新申请空间if (byteBuffer.length > INIT_SIZE) {byteBuffer = null;byteBuffer = new byte[INIT_SIZE];}int c = 0;byte bc = 0;try {READ_WHILE: while (c != -1) { // 一直读到输入流结束c = is.read();bc = (byte) c;append(c);// 使用KMP字符串匹配算法查找分隔符for (int i = 0; i < splitCount; i++) {while (splitChecks[i] > 0&& bc != splitBytes[i][splitChecks[i]]) {splitChecks[i] = splitShifts[i][splitChecks[i] - 1];}if (bc == splitBytes[i][splitChecks[i]]) {splitChecks[i]++;}if (splitChecks[i] == splitBytes[i].length) { // 成功找到bytesRead -= splitBytes[i].length;break READ_WHILE;}}}} catch (IOException e) {//}if (c == -1) {eos = true;}return getField();}// 返回已读入字段private String getField() {for (int i = 0; i < splitCount; i++) {splitChecks[i] = 0;}try {return new String(byteBuffer, 0, bytesRead, encode);} catch (Exception e) {//}return null;}// 将所读入的字节加入缓冲区private final void append(int c) {/** 若缓冲区不足,增加16个字节 */if (bytesRead >= byteBuffer.length) {byte[] exBuffer = new byte[byteBuffer.length + EXPAND_SIZE];System.arraycopy(byteBuffer, 0, exBuffer, 0, byteBuffer.length);byteBuffer = null;byteBuffer = exBuffer;}byteBuffer[bytesRead] = (byte) c;bytesRead++;}// 计算每次失配的时候,应该错动的距离(KMP算法)private static int[] getShift(byte[] subString) {if (subString.length < 1) {return null;}int shifts[] = new int[subString.length];shifts[0] = 0;int k = 0;for (int i = 1; i < subString.length; ++i) {k = shifts[i - 1];while (k > 0 && subString[i] != subString[k]) {k = shifts[k - 1];}if (subString[i] == subString[k])shifts[i] = k + 1;elseshifts[i] = 0;}return shifts;}}1 楼 XMLDB 2008-03-19 不太清楚你到底想干什么,如果只是从文件中按指定编码读取为JVM内码则是脱裤子放屁,多此一举。