代理IP访问 二
package spider.xxxxxx.common;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @author jxialiang
*
*/
public class Proxy {
private static final String url = Constants.PROXY_URL;
private static Map<String, String> ipMap = new HashMap<String, String>();
private static final String filePath = Constants.PROXY_PATH;
public static Map<String, String> getProxyMap() {
String html = "";
boolean flag = true;
while (flag) {
try {
Document doc = Jsoup.connect(url)
//.data("query", "Java")//请求参数
.userAgent("Mozilla")//设置User-Agen
.cookie("auth", "token")//设置cookie
.timeout(3000)//设置连接超时时间
.get();
html = doc.html();
flag = false;
} catch (Exception e) {
System.out.println("代理请求超时");
}
}
Document doc = Jsoup.parse(html);
Elements trs = doc.select("tr[onDblClick]");
for (Element e : trs) {
String ip = e.attr("onDblClick").replaceAll("clip", "").replaceAll(
"已拷贝到剪贴板!", "").replaceAll("alert", "").replaceAll("'", "")
.replaceAll(";", "").replaceAll("\\(", "").replaceAll(
"\\)", "");
String[] ipArray = ip.split(":");
ipMap.put(ipArray[0], ipArray[1]);
}
return ipMap;
}
public void init(){
Map<String, String> ipMap = getProxyMap();
try {
for (String ip : ipMap.keySet()) {
String port = ipMap.get(ip);
System.out.println("获取新的待检验的:" + ip + "=" + port);
String s = getContent("http://cs.anjiwu.com/join.html", ip,port);
PropertiesUtil propertiesUtil = new PropertiesUtil();
if(s != null && !s.equals("")) {
propertiesUtil.writeProperties(filePath,ip,port);
} else {
propertiesUtil.removeProperty(filePath, ip);
}
}
System.out.println("已经获取IP:"+ipMap.size());
} catch (Exception e) {
System.out.println("代理IP测试失效");
}
}
/**
* 通过网络地址获取内容
* @param url
* @return String
* @throws Exception
*/
public static String getContent(String url1, String host, String port) {
String s = "";
try {
Properties systemProperties = System.getProperties();
systemProperties.setProperty("http.proxyHost", host);
systemProperties.setProperty("http.proxyPort", port);
Document doc = Jsoup.connect(url1)
//.data("query", "Java")//请求参数
.userAgent("Mozilla")//设置User-Agen
.cookie("auth", "token")//设置cookie
.timeout(3000)//设置连接超时时间
.get();
s = doc.toString();
} catch (Exception e) {
System.out.println("代理IP测试请求超时");
}
return s;
}
}