Android 用 HttpClient 抓取 html 页面内容的方法
private String getHtmlContent(final String url) { String result = "";// 返回的结果 StringBuffer resultBuffer = new StringBuffer(); // 构造HttpClient的实例 HttpClient httpClient = new HttpClient(); // 创建GET方法的实例 GetMethod getMethod = new GetMethod(url); // 使用系统提供的默认的恢复策略 getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); // getMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"GB2312"); getMethod.getParams().setContentCharset("GB2312"); try { // 执行getMethod int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: "+ getMethod.getStatusLine()); } // 流式读取 // 读取内容 // byte[] responseBody = getMethod.getResponseBody(); // 处理内容 // String result = new String(responseBody,"GBK"); // result = getMethod.getResponseBodyAsString(); // System.out.println(result); // System.out.println(getMethod.getResponseCharSet()); // 推荐做法 BufferedReader in = new BufferedReader(new InputStreamReader( getMethod.getResponseBodyAsStream(), getMethod.getResponseCharSet())); String inputLine = null; while ((inputLine = in.readLine()) != null) { resultBuffer.append(inputLine); resultBuffer.append("\n"); } result = new String(resultBuffer); return result; } catch (HttpException e) { // 发生致命的异常,可能是协议不对或者返回的内容有问题 System.out.println("Please check your provided http address!"); e.printStackTrace(); } catch (IOException e) { // 发生网络异常 e.printStackTrace(); } finally { // 释放连接 getMethod.releaseConnection(); } return result; }}