问题描述
帮童鞋做的一个自动登录网站的程序,但是不成功,求帮忙分析下这个网页。usingSystem;usingSystem.Collections.Generic;usingSystem.ComponentModel;usingSystem.Data;usingSystem.Drawing;usingSystem.Linq;usingSystem.Text;usingSystem.Windows.Forms;usingmshtml;namespaceweb{publicpartialclassForm1:Form{publicForm1(){InitializeComponent();(webBrowser1.ActiveXInstanceasSHDocVw.WebBrowser).NavigateComplete2+=newSHDocVw.DWebBrowserEvents2_NavigateComplete2EventHandler(Form1_NavigateComplete2);}voidForm1_NavigateComplete2(objectpDisp,refobjectURL){IHTMLDocument2doc=(webBrowser1.ActiveXInstanceasSHDocVw.WebBrowser).DocumentasIHTMLDocument2;doc.parentWindow.execScript("window.alert=null","javascript");doc.parentWindow.execScript("window.confirm=null","javascript");doc.parentWindow.execScript("window.open=null","javascript");doc.parentWindow.execScript("window.showModalDialog=null","javascript");doc.parentWindow.execScript("window.close=null","javascript");}privatevoidwebBrowser1_DocumentCompleted(objectsender,WebBrowserDocumentCompletedEventArgse){HtmlDocumentdoc=this.webBrowser1.Document;if(doc.Url.AbsoluteUri=="http://gm.sdo.com/login/default.aspx?returnurl=http%3a%2f%2fgm.sdo.com%2fhome.aspx"){doc.GetElementById("check_account").InnerText="xxxxx";//账号doc.GetElementById("check_password").InnerText="xxxxx";//密码doc.GetElementById("btn_login").InvokeMember("click");}}privatevoidForm1_Load(objectsender,EventArgse){}}}
解决方案
解决方案二:
需求:客户的数据同时存在在另外一个不可控的系统中,需要和当前系统同步。思路:自动登录另外一个系统,然后抓取数据,同步到本系统中。技术点:模拟用户登录;保存登录状态;抓取数据程序非常简单:///<summary>///visitthetargeturl///</summary>///<paramname="targetURL"></param>///<paramname="cc">thisisforkeepingcookiesandsessions</param>///<paramname="param">thisisthedataneedpostinsideform</param>///<returns>htmlpage</returns>publicstaticstringPostAndGetHTML(stringtargetURL,CookieContainercc,Hashtableparam){//preparethesubmitdatastringformData="";foreach(DictionaryEntrydeinparam){formData+=de.Key.ToString()+"="+de.Value.ToString()+"&";}if(formData.Length>0)formData=formData.Substring(0,formData.Length-1);//removelast'&'ASCIIEncodingencoding=newASCIIEncoding();byte[]data=encoding.GetBytes(formData);HttpWebRequestrequest=(HttpWebRequest)WebRequest.Create(targetURL);request.Method="POST";//postrequest.ContentType="application/x-www-form-urlencoded";request.ContentLength=data.Length;request.UserAgent="Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;SV1;.NETCLR2.0.1124)";StreamnewStream=request.GetRequestStream();newStream.Write(data,0,data.Length);newStream.Close();request.CookieContainer=cc;HttpWebResponseresponse=(HttpWebResponse)request.GetResponse();cc.Add(response.Cookies);Streamstream=response.GetResponseStream();stringresult=newStreamReader(stream,System.Text.Encoding.Default).ReadToEnd();returnresult;}这一个是调用的例子:先登录,在查询。实际中这个逻辑可能有很多步骤privatevoidbutton2_Click(objectsender,EventArgse){CookieContainercc=newCookieContainer();//thisisforkeeptheSessionandCookieHashtableparam=newHashtable();//thisisforkeeppostdata.stringurlLogin="http://demo.server//login.asp";//dofindtheelementIdthatneeded.checkthesourceofloginpagecangetthisinformationparam.Add("User","xxx");param.Add("Password","xxxx");stringresult=PostAndGetHTML(urlLogin,cc,param);//checkresult,whetherloginsuccess//ifloginsuccess,gotothetargeturl,andinputsomevalue.stringurl2="http://demo.server/query.asp?id=1";//needchange.speciallogicparam.Clear();//param.Add("SearchAreaId","JobId")result=PostAndGetHTML(url2,cc,newHashtable());//ConvertToDTthehtmlordosomethingothers}这是一个简单的抓取网页数据的函数(针对Table内的,直接转化成DataTable)privateDataTableConvertToDT(DataTabledt,stringtableHTML){intlastTD=tableHTML.ToLower().LastIndexOf("</td>");intfirstRow=tableHTML.ToLower().IndexOf("<tr")+3;//after""<trintindex=tableHTML.ToLower().IndexOf("<tr",firstRow)+3;//after""<trwhile(index<lastTD){DataRowdr=dt.NewRow();for(inti=0;i<dt.Columns.Count;i++){stringvalue="";intstartTD=tableHTML.ToLower().IndexOf("<td",index)+3;//after"<td"intendTD=tableHTML.ToLower().IndexOf("</td>",startTD);if(endTD<0)break;stringtdStr=tableHTML.Substring(startTD,endTD-startTD);//remove<>andotherstdStr=tdStr.Replace(" ","").Replace("t","").Replace("r","");string[]v=tdStr.Split('<','>');for(intj=0;j<v.Length;j++){j++;if(v[j].Trim()!=""){value=v[j].Trim();break;}}//dr[i]=value;index=endTD;}dt.Rows.Add(dr);}returndt;}注:对于有验证码登录系统的无效。(如果该系统的验证码放到cookie中存储的例外,这个容易破解)
解决方案三:
用cooick或者数据库模式去操作