java程序的乱码问题
这段代码可以直接运行,就是输入输出有乱码问题,请高手指点一下
package com.robot.analyzer;
import java.io.*;
import java.util.TreeMap;
import java.util.TreeSet;
/**
* *****************************************************
* 分词
* *****************************************************
*/
public class SegCn {
private String separator=" ";
private static SegCn segmenter = null;
private TreeMap<String, Boolean> cnWords;
private TreeSet<String> cForeign, cNumbers;
//加载词典
private SegCn() {
cForeign = new TreeSet<String>();
cNumbers = new TreeSet<String>();
loadset(cNumbers, "..//Number.txt");
loadset(cForeign, "..//Foreign.txt");
System.out.print("Loading Lexicon");
cnWords = new TreeMap<String, Boolean>();
String newword = null;
try {
InputStream worddata = null;
worddata =new FileInputStream(new File("..//Dictionary.txt"));
BufferedReader in = new BufferedReader(new InputStreamReader(
worddata));
int i=0;
while ((newword = in.readLine()) != null) {
if((++i)%10000==0){
System.out.print('.');
}
if ((newword.indexOf("#") == -1) && (newword.length() < 5)) {
cnWords.put(newword.intern(), true);
if (newword.length() == 3) {
if (cnWords.containsKey(newword.substring(0, 2)
.intern()) == false) {
cnWords.put(newword.substring(0, 2).intern(), false);
}
}
if (newword.length() == 4) {
if (cnWords.containsKey(newword.substring(0, 2)
.intern()) == false) {
cnWords.put(newword.substring(0, 2).intern(), false);
}
if (cnWords.containsKey(newword.substring(0, 3)
.intern()) == false) {
cnWords.put(newword.substring(0, 3).intern(), false);
}
}
}
}
in.close();
System.out.println();
System.out.println("词典加载成功");
System.out.println("load words number is "+i);
} catch (IOException e) {
System.out.println("Loading Lexicon failuer");
e.printStackTrace();
}
}
public synchronized static void reset() {
SegCn.segmenter = null;
}
public synchronized static SegCn getSegmenter() {
if (SegCn.segmenter == null) {
SegCn.segmenter = new SegCn();
}
return SegCn.segmenter;
}
private void loadset(TreeSet<String> targetset, String sourcefile) {
String dataline;
try {
InputStream fr=new FileInputStream(new File(sourcefile));
BufferedReader in = new BufferedReader(new InputStreamReader(
fr, "UTF-8"));
while ((dataline = in.readLine()) != null) {
if ((dataline.indexOf("#") > -1) || (dataline.length() == 0)) {
continue;
}
targetset.add(dataline.intern());
}
in.close();
} catch (Exception e) {
System.err.println("Exception loading data file" + sourcefile + " "
+ e);
e.printStackTrace();
}
}
// 全是数字的情况
public boolean isNumber(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (!cNumbers.contains(testword.substring(i, i + 1).intern()) ) {
result = false;
break;
}
}
return result;
}
//全是外语的情况
public boolean isAllForeign(String testword) {
boolean result = true;
for (int i = 0; i < testword.length(); i++) {
if (!cForeign.contains(testword.substring(i, i + 1).intern())) {
result = false;
break;
}
}
return result;
}
//是中文的情况
public static boolean isCn(char c){
return(Character.UnicodeBlock.of(c)
== Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);//这边是编码
}
//分词
public String segment(String cline ) {
StringBuffer currentword = new StringBuffer();
StringBuffer outline = new StringBuffer();
int i, clength;
char currentchar;
clength = cline.length();
for (i = 0; i < clength; i++) {
currentchar = cline.charAt(i);
if (Character.UnicodeBlock.of(currentchar) ==
Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| isNumber(cline.substring(i, i + 1)) == true) {
if (currentword.length() == 0) { // start looking for next
// word
if (i > 0
&& (Character.isWhitespace(cline.charAt(i - 1)) == false)) {
outline.append(separator);
}
currentword.append(currentchar);
} else {
if (cnWords.containsKey(new String(currentword.toString()
+ currentchar).intern()) == true
&& ((cnWords.get(new String(currentword
.toString()+ currentchar).intern())))) {
// word is in lexicon
currentword.append(currentchar);
} else if (isAllForeign(currentword.toString())
&& cForeign.contains(new String(
new char[] { currentchar }).intern())
&& i + 2 < clength
&& (cnWords.containsKey(cline.substring(i, i + 2)
.intern()) == false)) {
// Possible a transliteration of a foreign name
currentword.append(currentchar);
} else if (isNumber(currentword.toString())
&& cNumbers.contains(new String(
new char[] { currentchar }).intern())
/*
* && (i + 2 < clength) &&
* (zhwords.containsKey(cline.substring(i, i+2).intern()) ==
* false)
*/) {
// Put all consecutive number characters together
currentword.append(currentchar);
} else if ((cnWords.containsKey(new String(currentword
.toString()
+ currentchar).intern()))
&& (( (cnWords.get(new String(currentword
.toString()
+ currentchar).intern()))).equals(false) == true)
&& i + 1 < clength
&& (cnWords.containsKey(new String(currentword
.toString()
+ currentchar + cline.charAt(i + 1))
.intern()) == true)) {
// Starts a word in the lexicon
currentword.append(currentchar);
} else { // Start anew
outline.append(currentword.toString());
if (Character.isWhitespace(currentchar) == false) {
outline.append(separator);
}
currentword.setLength(0);
currentword.append(currentchar);
}
}
} else { // Not chinese character
if (currentword.length() > 0) {
outline.append(currentword.toString());
if (Character.isWhitespace(currentchar) == false) {
outline.append(separator);
}
currentword.setLength(0);
}
outline.append(currentchar);
}
}
outline.append(currentword.toString());
return outline.toString();
}
public void setSeparator(String separator){
this.separator=separator;
}
public static void main(String[] args) throws Exception {
SegCn seg = SegCn.getSegmenter();
while(true){
System.out.print(":");
BufferedReader br=new BufferedReader(new InputStreamReader(System.in));
String s = null;
try
{
s = new String(br.readLine().getBytes("UTF-8"));
}catch(IOException e){
e.printStackTrace();
}
System.out.println(s);
System.out.println(seg.segment(s));
}
}
}
[解决办法]
编码属性设置好了没哟? 看看是gbk还是utf8