关于千万笔String处理效能
al9是一个ArrayList但存有一千多万笔String数据所进行如下过滤工作会跑特久
怎么修改以下过滤工作可以加快处理(任何可以加快的方法都可以)
String filter=""; Iterator<String> it = al9.iterator(); Iterator itt = al9.iterator(); while(itt.hasNext()){ String bom = (String)itt.next(); String[] boma = bom.split("\\$"); if(filter.equals("")){ Iterator it2 = al9.iterator(); while(it2.hasNext()){ String bom2 = (String)it2.next(); String[] bom2a = bom2.split("\\$"); if((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))){ bomal2.add(bom2); } //i01++; } filter=filter+boma[0]+"$"+boma[2]+"$"+boma[3]+";"; }else if(filter.contains(boma[2]+"$"+boma[3]+";")){ }else{ Iterator it2 = al9.iterator(); while(it2.hasNext()){ String bom2 = (String)it2.next(); String[] bom2a = bom2.split("\\$"); if((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))){ bomal2.add(bom2); } //i02++; } filter=filter+boma[2]+"$"+boma[3]+";"; } }
ArrayList<String> al9 = new ArrayList<String>(); ArrayList<String> bomal2 = new ArrayList<String>(); String filter = ""; Iterator<String> it = al9.iterator(); Iterator itt = al9.iterator(); while (itt.hasNext()) { String bom = (String) itt.next(); String[] boma = bom.split("\\$"); if (filter.equals("")) { Iterator it2 = al9.iterator(); while (it2.hasNext()) { String bom2 = (String) it2.next(); String[] bom2a = bom2.split("\\$"); if ((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))) { bomal2.add(bom2); } // i01++; } filter = filter + boma[0] + "$" + boma[2] + "$" + boma[3] + ";"; } else if (filter.contains(boma[2] + "$" + boma[3] + ";")) { } else { Iterator it2 = al9.iterator(); while (it2.hasNext()) { String bom2 = (String) it2.next(); String[] bom2a = bom2.split("\\$"); if ((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))) { bomal2.add(bom2); } // i02++; } filter = filter + boma[2] + "$" + boma[3] + ";"; } }
[解决办法]
if (filter.equals(""))
{
}
else if (filter.contains(boma[2] + "$" + boma[3] + ";"))
{
}
else
{
}
这段把第一个IF删除,直接
if (filter.contains(boma[2] + "$" + boma[3] + ";"))
{
}
else
{
}
这样逻辑的复杂性降低了一点,代码更好懂。
[解决办法]
我已经把程序简化成这样了:
public static void main(String[] args) { ArrayList<String> list = new ArrayList<String>(); ArrayList<String> bomal2 = new ArrayList<String>(); String filter = ""; for (String bom : list) { String[] bomArray = bom.split("\\$"); String key = bomArray[2] + "$" + bomArray[3] + ";"; if (filter.contains(key)) { } else { for (String bom2 : list) { String[] bom2Array = bom2.split("\\$"); if ((bom2Array[1].equals(bomArray[1])) && (bom2Array[2].equals(bomArray[2])) && (bom2Array[3].equals(bomArray[3]))) { bomal2.add(bom2); } } filter = filter + key; } } }
[解决办法]
多线程吧,跑几万个还很快,千万级的还试过
[解决办法]
这里有个错误:
if (filter.contains(key))
filter里的值是 12$13;14$15;...
这样以后有 2$13;的也会 contains() 为 true, 这不知道是需求还是BUG
是BUG的话可以使filter初始为";", 比较时这样:
if (filter.contains(";" + key))
在key前加个分号。当然,楼主这样的做法必须保证值是不会有分号。
我觉得contains还是用hashSet快些
[解决办法]
以下是我的测试程序,test4比test性参大概提升了15倍。
public class Test{ public static void main(String[] args) { final ArrayList<String> list = new ArrayList<String>(10000); ArrayList<String> bomal2 = new ArrayList<String>(10000); Random r = new Random(); for (int i = 0; i < 10000; i++) { String v = "$" + r.nextInt(100) + "$" + r.nextInt(100) + "$" + r.nextInt(100); list.add(v); } System.out.println("start ... "); long start, end; start = System.currentTimeMillis(); test(list, bomal2); end = System.currentTimeMillis(); System.out.println("\ntest spend: " + (end - start)); System.out.println("size: " + bomal2.size()); System.out.println(bomal2.toString()); bomal2.clear(); start = System.currentTimeMillis(); test2(list, bomal2); end = System.currentTimeMillis(); System.out.println("\ntest2 spend: " + (end - start)); System.out.println("size: " + bomal2.size()); System.out.println(bomal2.toString()); bomal2.clear(); start = System.currentTimeMillis(); test3(list, bomal2); end = System.currentTimeMillis(); System.out.println("\ntest3 spend: " + (end - start)); System.out.println("size: " + bomal2.size()); System.out.println(bomal2.toString()); bomal2.clear(); start = System.currentTimeMillis(); test4(list, bomal2); end = System.currentTimeMillis(); System.out.println("\ntest4 spend: " + (end - start)); System.out.println("size: " + bomal2.size()); System.out.println(bomal2.toString()); } public static void test(final ArrayList<String> list, ArrayList<String> bomal2) { String filter = ";"; for (String bom : list) { String[] bomArray = bom.split("\\$"); String key = bomArray[2] + "$" + bomArray[3] + ";"; if (!filter.contains(";" + key)) { for (String bom2 : list) { String[] bom2Array = bom2.split("\\$"); if ((bom2Array[1].equals(bomArray[1])) && (bom2Array[2].equals(bomArray[2])) && (bom2Array[3].equals(bomArray[3]))) { bomal2.add(bom2); } } filter = filter + key; } } System.out.println(filter); } public static void test2(final ArrayList<String> list, ArrayList<String> bomal2) { Set<String> filter = new HashSet<String>(list.size() / 2); for (String bom : list) { String[] bomArray = bom.split("\\$"); String key = bomArray[2] + "$" + bomArray[3]; if (!filter.contains(key)) { for (String bom2 : list) { String[] bom2Array = bom2.split("\\$"); if ((bom2Array[1].equals(bomArray[1])) && (bom2Array[2].equals(bomArray[2])) && (bom2Array[3].equals(bomArray[3]))) { bomal2.add(bom2); } } filter.add(key); } } System.out.println(filter); } public static void test3(final ArrayList<String> list, ArrayList<String> bomal2) { Set<String> filter = new HashSet<String>(list.size() / 2); for (String bom : list) { int bomIdx1 = bom.indexOf("$"); int bomIdx2 = bom.indexOf("$", bomIdx1 + 1); String key = bom.substring(bomIdx2 + 1); if (!filter.contains(key)) { for (String bom2 : list) { if (bom2.endsWith(bom.substring(bomIdx1))) { bomal2.add(bom2); } } filter.add(key); } } System.out.println(filter); } public static void test4(final ArrayList<String> list, ArrayList<String> bomal2) { Set<String> filter = new HashSet<String>(list.size() / 2); for (int i = 0; i < list.size(); i++) { String bom = list.get(i); int bomIdx1 = bom.indexOf("$"); int bomIdx2 = bom.indexOf("$", bomIdx1 + 1); String key = bom.substring(bomIdx2 + 1); if (!filter.contains(key)) { for (int j = i; j < list.size(); j++) { String bom2 = list.get(j); if (bom2.endsWith(bom.substring(bomIdx1))) { bomal2.add(bom2); } } filter.add(key); } } System.out.println(filter); }}
[解决办法]
HashSet<String> filter = new HashSet<String>(); for(String bom : al9){ String[] boma = bom.split("\\$"); if(!filter.contains(boma[2] + "$" + boma[3])){ for(String bom2 : al9){ String[] bom2a = bom2.split("\\$"); if ((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))) { bomal2.add(bom2); } } filter.add(boma[2] + "$" + boma[3]); } }
[解决办法]
static String charReguExp = "(([a-zA-Z]*\\$)([a-zA-Z]*\\$([a-zA-Z]*\\$[a-zA-Z]*)\\$)[a-zA-Z]*)"; static Pattern pattern = Pattern.compile(charReguExp); static StringBuffer sb = new StringBuffer(""); // static List<String> al9 = Arrays.asList("aaa$bbb$ccc$ddd$",// "bbb$hhh$ccc$ddd$ttt",// "ttt$jj$nnn$ddd$ooo",// "bbb$hhh$cc$ddd$ttt"); static List<String> al9 = new ArrayList<String>(); static{ for(int i = 1; i <= 5000000; i++){ for(int j = 1; j <= 4; j++){ al9.add("aaa$bbb$ccc$ddd$"); al9.add("bbb$hhh$ccc$ddd$ttt"); al9.add("ttt$jj$nnn$ddd$ooo"); al9.add("bbb$hhh$cc$ddd$ttt"); } } } public static void main(String[] args) { prossessByRegEx(al9); System.out.println("============"); process(al9); } public static void prossessByRegEx(List<String> al9){ long start = System.currentTimeMillis(); ArrayList<String> bomal2 = new ArrayList<String>(); StringBuffer filter = new StringBuffer(""); addValueForList(al9.get(0), al9, bomal2); filter.append(getString(al9.get(0), 2, 4)); for(int i = 1; i < al9.size(); i++){ if(!contains(filter.toString(), al9.get(i))){ addValueForList(al9.get(i), al9, bomal2); filter.append(getString(al9.get(i), 4)); } } long end = System.currentTimeMillis(); System.out.println(end - start); System.out.println(bomal2.size());// for(String str : bomal2){// System.out.println(str);// } System.out.println(filter.toString()); } private static String getString(String strValue, int... groups){ sb.delete(0, sb.length()); Matcher regexMatcher = pattern.matcher(strValue); if(regexMatcher.find()){ for(int i : groups){ sb.append(regexMatcher.group(i)); } } if(sb.length() > 0){ sb.append(";"); } return sb.toString(); } private static boolean contains(String filter, String strValue){ return filter.contains(getString(strValue, 4)); } private static void addValueForList(String strValue,List<String> al9, List<String> bomal2){ String temp = getString(strValue, 3); for(String val : al9){ if(temp.equals(getString(val, 3))){ bomal2.add(val); } } } public static void process(List<String> al9) { long start = System.currentTimeMillis(); ArrayList<String> bomal2 = new ArrayList<String>(); String filter = ""; Iterator<String> it = al9.iterator(); Iterator itt = al9.iterator(); while (itt.hasNext()) { String bom = (String) itt.next(); String[] boma = bom.split("\\$"); if (filter.equals("")) { Iterator it2 = al9.iterator(); while (it2.hasNext()) { String bom2 = (String) it2.next(); String[] bom2a = bom2.split("\\$"); if ((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))) { bomal2.add(bom2); } // i01++; } filter = filter + boma[0] + "$" + boma[2] + "$" + boma[3] + ";"; } else if (filter.contains(boma[2] + "$" + boma[3] + ";")) { } else { Iterator it2 = al9.iterator(); while (it2.hasNext()) { String bom2 = (String) it2.next(); String[] bom2a = bom2.split("\\$"); if ((bom2a[2].equals(boma[2])) && (bom2a[1].equals(boma[1])) && (bom2a[3].equals(boma[3]))) { bomal2.add(bom2); } // i02++; } filter = filter + boma[2] + "$" + boma[3] + ";"; } } long end = System.currentTimeMillis(); System.out.println(end - start); System.out.println(bomal2.size());// for(String str : bomal2){// System.out.println(str);// } System.out.println(filter); }
[解决办法]