判断编码问题
自己写了一个(第一个),然后客户提供了一个,均分享。
public String GetCharset(File file) {String charset = "SJIS";byte[] first3Bytes = new byte[3];try {boolean checked = false;BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));bis.mark(0);int read = bis.read(first3Bytes, 0, 3);if (read == -1)return charset;if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {charset = "UTF-16LE";checked = true;} else if (first3Bytes[0] == (byte) 0xFE&& first3Bytes[1] == (byte) 0xFF) {charset = "UTF-16BE";checked = true;} else if (first3Bytes[0] == (byte) 0xEF&& first3Bytes[1] == (byte) 0xBB&& first3Bytes[2] == (byte) 0xBF) {charset = "UTF-8";checked = true;}bis.reset();if (!checked) {int loc = 0;while ((read = bis.read()) != -1) {loc++;if (read >= 0xF0)break;if (0x80 <= read && read <= 0xBF) break;if (0xC0 <= read && read <= 0xDF) {read = bis.read();if (0x80 <= read && read <= 0xBF) continue;elsebreak;} else if (0xE0 <= read && read <= 0xEF) {read = bis.read();if (0x80 <= read && read <= 0xBF) {read = bis.read();if (0x80 <= read && read <= 0xBF) {charset = "UTF-8";break;} elsebreak;} elsebreak;}}}bis.close();} catch (Exception e) {e.printStackTrace();}return charset;}
import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.UnsupportedEncodingException;import org.mozilla.universalchardet.UniversalDetector;public class EncodingTest { public static void main(String[] args) throws UnsupportedEncodingException { String sourceFilename = "source.txt"; FileInputStream fis= null; try { fis = new FileInputStream(sourceFilename); } catch (FileNotFoundException e1) { e1.printStackTrace(); return; } UniversalDetector detector = new UniversalDetector(null); try { while (true) { byte[] bytes = new byte[1024]; int nread; nread = fis.read(bytes); if(nread <= 0){ break; } detector.handleData(bytes, 0, nread); if(detector.isDone() == true){ break; } } } catch (IOException e) { e.printStackTrace(); return; } detector.dataEnd(); if(fis != null){ try { fis.close(); } catch (IOException e) { e.printStackTrace(); return; } } String encoding = detector.getDetectedCharset(); System.out.println("Encoding = " + encoding); }}