java抓网站html
import java.io.DataInputStream;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;//用url获取网上的资源html文件 (html源代码)public class GetHtmlByUrl {public void getHtmlbyurl(){try {URL url = new URL("http://www.iteye.com");DataInputStream di = new DataInputStream(url.openStream());byte [] bytes = new byte[1024];while (di.read(bytes, 0, 1024)!=-1) {System.out.println(new String(bytes,"utf-8"));}di.close();} catch (MalformedURLException e) {// e.printStackTrace();} catch (IOException e) {// e.printStackTrace();}}public static void gethtmlbyurlcon(){try {URL url = new URL("http://www.csdn.org");URLConnection urlcon = url.openConnection();DataInputStream di = new DataInputStream(url.openStream());byte [] bytes = new byte[1024];while (di.read(bytes, 0, 1024)!=-1) {System.out.println(new String(bytes,"utf-8"));}di.close();} catch (MalformedURLException e) {// e.printStackTrace();} catch (IOException e) {// e.printStackTrace();}}public static void main(String[] args) {GetHtmlByUrl.gethtmlbyurlcon();}}