自动获取网页内容,然后写入excel文档中
?
今天需要将北京物业公司名称以及电话给整理出来,发现爱帮网上的数据比较多,自己一个一个的去摘取,太麻烦了,于是写一段代码来完成,并自动写入excel文档中,主要用的Jsoup 以及jxl 插件,很方便。
?
代码如下:
?
package com.bes.st.buz.website;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.bes.core.StringUtil;
import com.bes.st.rw.WriteFacade;
public class FetchUrl {
?/**
? * @param url
? * @return
? */
?public static List<WebsiteBean> fetchData(final String url) {
??List<WebsiteBean> result = new ArrayList<WebsiteBean>();
??try {
???Document doc = Jsoup.connect(url).timeout(10000).get();
???Elements es = doc.select("div.aside");
???for (int i = 0; i < es.size(); i++) {
????Elements aArr = es.get(i).select("a.title");
????Elements pArr = es.get(i).select("div.part1 p");
????Elements tArr = es.get(i).select("span.biztel");
????String str = StringUtil.nvl(aArr.attr("title"));
????String tel = StringUtil.nvl(tArr.text());
????if(str.length() == 0 || tel.length() == 0){
?????continue;
????}
????WebsiteBean bean = new WebsiteBean();
????bean.setName(aArr.attr("title"));
????if(pArr.size()>0){
?????bean.setAddress(pArr.get(0).text());
????}else{
?????bean.setAddress(pArr.text());
????}
????bean.setTel(tel);
????result.add(bean);
???}
???
??} catch (Exception e) {
???e.printStackTrace();
??}
??return result;
?}
?
?public static void writeFetchData(List<String> urls){
??List<WebsiteBean> result = new ArrayList<WebsiteBean>();
??if(urls != null){
???for(String url : urls){
????result.addAll(fetchData(url));
???}
??}
??String output = "c:/tmp";
??File file = new File(output, "tt.xls");
??if (file.exists()) {
???file.delete();
??}
??OutputStream out = null;
??try{
??file.createNewFile();
??out = new FileOutputStream(file);
??WriteFacade.writeExcel(out, "物业公司", result);
??}catch(Exception ex){
???ex.printStackTrace();
??}finally{
???try{
????if(out != null){
?????out.close();
????}
???}catch(Exception ex){}
??}
??
?}
?public static void main(String[] args) {
??List<String> urls = new ArrayList<String>();
??urls.add("http://www.aibang.com/?addr=%E5%85%A8%E5%B8%82&what=%E7%89%A9%E4%B8%9A&area=bizsearch2&cmd=noscript&script=false&city=%E5%8C%97%E4%BA%AC");
??for(int i=2;i<20;i++){
???urls.add("http://www.aibang.com/?area=bizsearch2&cmd=bigmap&city=%E5%8C%97%E4%BA%AC&a=&q=%E7%89%A9%E4%B8%9A&as=5000&ufcate=&rc=1&zone=&quan=&fm=&p=" + i);
??}
??writeFetchData(urls);
//??fetchData("http://www.aibang.com/?addr=%E5%85%A8%E5%B8%82&what=%E7%89%A9%E4%B8%9A&area=bizsearch2&cmd=noscript&script=false&city=%E5%8C%97%E4%BA%AC");
?}
}
?