问题描述
publicclassTest{publicstaticvoidmain(String[]args){Stringcontent=getHtmlSource("http://localhost:8080/TestDom4j/test.htm");System.out.println(content);//intindex=//content.indexOf("<divclass="textclear"id="contentText"collection="Y">");//if(index!=-1){//content=//content.substring(content.indexOf(">",index)+1,content.indexOf("</div>",index));//System.out.println(content);//}}publicstaticStringgetHtmlSource(Stringurl){Stringlinesep,htmlLine;linesep=System.getProperty("line.separator");StringBufferhtmlSource=newStringBuffer();try{java.net.URLsource=newURL(url);BufferedReaderin=newBufferedReader(newInputStreamReader(source.openStream(),"gb2312"));while((htmlLine=in.readLine())!=null){System.out.println(htmlLine);htmlSource.append(htmlLine+linesep);}}catch(Exceptione){e.printStackTrace();}returnhtmlSource.toString();}publicstaticStringgetOneHtml(finalStringhtmlurl)throwsIOException{URLurl;Stringtemp;finalStringBuffersb=newStringBuffer();try{url=newURL(htmlurl);finalBufferedReaderin=newBufferedReader(newInputStreamReader(url.openStream(),"gb2312"));//读取网页全部内容while((temp=in.readLine())!=null){sb.append(temp);}in.close();}catch(finalMalformedURLExceptionme){System.out.println("你输入的URL格式有问题!请仔细输入");me.getMessage();throwme;}catch(finalIOExceptione){e.printStackTrace();throwe;}returnsb.toString();}}