首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 其他教程 > 互联网 >

抓取网下信息,抓取人人网院校

2012-10-06 
抓取网上信息,抓取人人网院校这是我自己写的一个对HttpClient的一个改进代码写上import java.io.IOExcepti

抓取网上信息,抓取人人网院校
这是我自己写的一个对HttpClient的一个改进
代码写上

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.HTTP;

public class HttpUtil {
private HttpClient httpClient ;
private HttpResponse response ;
public HttpUtil(){
httpClient = new DefaultHttpClient() ;
}
public HttpUtil(String ip ,int port){
httpClient = new DefaultHttpClient() ;
this.setProxy(ip, port) ;
}
public String getMethodHt(String url){
HttpGet get = new HttpGet(url) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(get,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return html ;
}
public HttpResponse getMethodRe(String url){
HttpGet get = new HttpGet(url) ;
try {
response = httpClient.execute(get) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return response ;
}
public String getMethodHt(String url,Map<String,String> params){
HttpGet get = new HttpGet(url) ;
Set<String> set = params.keySet() ;
HttpParams basicParams = new BasicHttpParams() ;
for(String key: set){
basicParams.setParameter(key,params.get(key)) ;
}
get.setParams(basicParams) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(get,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return html ;
}
public HttpResponse getMethodRe(String url,Map<String,String> params){

HttpGet get = new HttpGet(url) ;
Set<String> set = params.keySet() ;
HttpParams basicParams = new BasicHttpParams() ;
for(String key: set){
basicParams.setParameter(key,params.get(key)) ;
}
get.setParams(basicParams) ;
try {
response = httpClient.execute(get) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
get.abort() ;
}
return response ;
}
public String postMethodHt(String url){
HttpPost post = new HttpPost(url) ;
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
html = httpClient.execute(post,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return html ;
}
public HttpResponse postMethodRe(String url){
HttpPost post = new HttpPost(url) ;
try {
response = httpClient.execute(post) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return response ;
}
public String postMethodHt(String url , Map<String,String> params){
HttpPost post = new HttpPost(url) ;
List<BasicNameValuePair> qparams = new ArrayList<BasicNameValuePair>() ;
Set<String> set =  params.keySet() ;
for(String key : set){
qparams.add(new BasicNameValuePair(key,params.get(key))) ;
}
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String html = "" ;
try {
post.setEntity(new UrlEncodedFormEntity(qparams, HTTP.UTF_8));
html = httpClient.execute(post,responseHandler) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return html ;
}
public HttpResponse postMethodRe(String url,Map<String,String> params ){
HttpPost post = new HttpPost(url) ;
List<BasicNameValuePair> qparams = new ArrayList<BasicNameValuePair>() ;
Set<String> set =  params.keySet() ;
for(String key : set){
qparams.add(new BasicNameValuePair(key,params.get(key))) ;
}
try {
post.setEntity(new UrlEncodedFormEntity(qparams, HTTP.UTF_8));
response = httpClient.execute(post) ;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
post.abort() ;
}
return response ;
}
public void setProxy(String ip ,int port){
HttpHost proxy = new HttpHost(ip,port) ;
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, 
                proxy) ;
}
public void close(){
if(httpClient!=null)
httpClient.getConnectionManager().shutdown();
}
}
利用这个类可以更好地操作对http操作 ,但是不完善
其实抓取信息 1.http协议的了解
             2.抓取网页内容
             3.正则表达式

热点排行