应用dom4j 和xpath 通过url读取xml数据的实例

2012-09-20

使用dom4j 和xpath 通过url读取xml数据的实例我这个实例是在我开发过程中用到的，本人是菜鸟，希望高手勿喷！

使用dom4j 和xpath 通过url读取xml数据的实例

我这个实例是在我开发过程中用到的，本人是菜鸟，希望高手勿喷！

首先dom4j 需要在项目中添加一些包，另外调用xpath 也需要一些包，然后我这里就不多说了，直接说一下我这个代码的具体思路：

1、根据一定的规则，创建读取xml的url （由于我这个代码是根据用户不同的选择读取不同的xml）

2、检查url的合法性，主要是看url是否是有效链接

3、创建saxReader实例，接着创建doc实例

4、通过xpath中的selectNodes读取出数据，（其实这个是dom4j的一个方法，主要需要xpath的支持）

5、打包数据，挂在arraylist中，最后写入数据库

代码如下：

public class ParseXML {private ImportNameForm dataform;private int type;private Academic ac;private int count = 0; // 返回文章的数量private StringBuffer finalStrBuffer = new StringBuffer();ArrayList<Academic> ar = new ArrayList<Academic>();// 把年份和账号通过构造函数传过来public ParseXML(ImportNameForm dataform, int type) {this.dataform = dataform;this.type = type;}// 返回文章的数量public int getCount() {return count;}public ArrayList<Academic> xml2arraylist() {this.parsing(1);return ar;}public String xml2string() {this.parsing(2);return finalStrBuffer.toString();}// 根据地址在xml中把数据读出，当flag等于1，把数据放在academic中，然后再放在arraylist中// 当flag=2 把数据转化成html放在buffer中//flag等于1是为了方便存储，flag等于2是为了在选择名字后的显示public void parsing(int flag) {switch (type) {case 1:this.getPaperXML(flag);break;case 2:this.getPublicationXML(flag);break;case 3:this.getProjectXML(flag);break;case 4:this.getPatentXML(flag);break;}}public String xzmTemplate(int i) {String str;String[] xzm = dataform.getXzm();str = "<tr><td bgcolor='#dddddd'>学者：";str = str + xzm[i] + " 的相关信息</td></tr>";return str;}public String checkboxTemplate(Academic ac) {String str;str = "<tr><td><input type='checkbox' name='ckb' value='";str = str + ac.getId() + "'/>";str = str + ac.getTitle() + "</td></tr>";return str;}// 根据用户的账号和选择的年份生成URLpublic String[] createURL() {String year1 = dataform.getYear1();String year2 = dataform.getYear2();String[] ckb = dataform.getCkb();String[] str = new String[ckb.length];String strType;switch (type) {case 1:strType = "Paper";break;case 2:strType = "Publication";break;case 3:strType = "Project";break;case 4:strType = "Patent";break;default:strType = "Paper";break;}for (int i = 0; i < ckb.length; i++) {str[i] = "http://www.XXXXXX.com/rest/";str[i] = str[i] + strType + "/" + ckb[i].trim() + "/" + year1 + "-"+ year2;}return str;}// 检查URL是否有效private boolean isConnect(String url) {boolean flag = false;int counts = 0;if (url == null || url.length() <= 0) {return flag;}while (counts < 5) {try {HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();int state = connection.getResponseCode();if (state == 200) {flag = true;}break;} catch (Exception ex) {counts++;continue;}}return flag;}public void getPaperXML(int flag) {System.out.println("Paper_parsing");String[] strURL = this.createURL();SAXReader saxReader = new SAXReader();try {for (int i = 0; i < strURL.length; i++) {if (isConnect(strURL[i])) {URL url;url = new URL(strURL[i]);Document doc = saxReader.read(url);List paperids = doc.selectNodes("/scholarPapers/scholarPaper/id");List titles = doc.selectNodes("/scholarPapers/scholarPaper/title");List authors = doc.selectNodes("/scholarPapers/scholarPaper/authors");List dates = doc.selectNodes("/scholarPapers/scholarPaper/date");List sources = doc.selectNodes("/scholarPapers/scholarPaper/source");List types = doc.selectNodes("/scholarPapers/scholarPaper/type");List citations = doc.selectNodes("/scholarPapers/scholarPaper/citation");if (flag == 2) {finalStrBuffer.append(xzmTemplate(i));}for (int j = 0; j < paperids.size(); j++) {count++;ac = new Academic();ac.setAcademic_class(String.valueOf(type));ac.setAuthor(((Element) authors.get(j)).getText());ac.setPaper(((Element) sources.get(j)).getText());ac.setPtype(((Element) types.get(j)).getText());ac.setTitle(((Element) titles.get(j)).getText());ac.setId(Integer.parseInt(((Element) paperids.get(j)).getText()));ac.setYear(Integer.parseInt(((Element) dates.get(j)).getText().substring(0, 4)));ac.setContent(((Element)citations.get(j)).getText());if (flag == 1) {ar.add(ac);} elsefinalStrBuffer.append(checkboxTemplate(ac));}}}} catch (MalformedURLException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (DocumentException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}

热点排行

XML SOAP

应用dom4j 和xpath 通过url读取xml数据的实例