【爬虫】使用Post方法爬取网页,但是获取到的是原网页
从cnblogs的站内搜索网页提交post表单,爬取cnblogs返回的博客链接。将带有搜索结果的页面保存到文件中。
package postMethod;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.nio.file.Path;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.HttpStatus;import org.apache.commons.httpclient.NameValuePair;import org.apache.commons.httpclient.methods.PostMethod;public class Post { /** * @param args */ private static HttpClient httpClient = new HttpClient(); public static void main(String[] args) throws IOException { String path = "http://zzk.cnblogs.com/"; InputStream input = null; OutputStream output = null; // TODO Auto-generated method stub //得到post方法 PostMethod postMethod = new PostMethod(path); //设置post方法的参数 NameValuePair[] postData = new NameValuePair[1]; postData[0] = new NameValuePair("w","java"); postMethod.addParameters(postData); //执行,返回状态码 int statusCode = 0; try { statusCode = httpClient.executeMethod(postMethod); } catch (HttpException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println(statusCode); //针对状态码进行处理 if(statusCode == HttpStatus.SC_OK) { input = postMethod.getResponseBodyAsStream(); String filename = "aaaa.html"; output = new FileOutputStream(filename); int tempByte = -1; while ((tempByte = input.read()) > 0) { output.write(tempByte); } if(input != null) { input.close(); } if(output != null) { output.close(); } } }}
private static HttpClient httpClient = new HttpClient(); public static void main(String[] args) throws IOException { String path = "http://zzk.cnblogs.com/s?w=java"; InputStream input = null; OutputStream output = null; // TODO Auto-generated method stub //得到post方法 GetMethod postMethod = new GetMethod(path); //设置post方法的参数// NameValuePair[] postData = new NameValuePair[1];// postData[0] = new NameValuePair("w","java");// postMethod.addParameters(postData); //执行,返回状态码 int statusCode = 0; try { statusCode = httpClient.executeMethod(postMethod); } catch (HttpException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println(statusCode); //针对状态码进行处理 if(statusCode == HttpStatus.SC_OK) { input = postMethod.getResponseBodyAsStream(); String filename = "D:/aaaa.html"; output = new FileOutputStream(filename); int tempByte = -1; while ((tempByte = input.read()) > 0) { output.write(tempByte); } if(input != null) { input.close(); } if(output != null) { output.close(); } } }
[解决办法]
前些日子也遇到同样的问题。
就,将隐藏传递的参数添加到请求地址然后手动设置传递方式为post...