一个带采集远程文章内容,保存图片,生成文件等完整的采集功能

本文提供了一套完整的ASP采集功能函数,包含提取地址的原字符,保存远程的文件到本地模拟登录,获取网页源码等功能函数,阿里西西站长推荐收藏!

'==================================================
'函数名:GetHttpPage
'作  用:获取网页源码
'参  数:HttpUrl ------网页地址
'==================================================
Function GetHttpPage(HttpUrl)
   If IsNull(HttpUrl)=True Or Len(HttpUrl)<18 Or HttpUrl="$False$" Then
      GetHttpPage="$False$"
      Exit Function
   End If
   Dim Http
   Set Http=server.createobject("MSX" & "ML2.XM" & "LHT" & "TP")
   Http.open "GET",HttpUrl,False
   Http.Send()
   If Http.Readystate<>4 then
      Set Http=Nothing
      GetHttpPage="$False$"
      Exit function
   End if
   GetHTTPPage=bytesToBSTR(Http.responseBody,"GB2312")
   GetHTTPPage=replace(replace(GetHTTPPage , vbCr,""),vbLf,"")
   Set Http=Nothing
   If Err.number<>0 then
      Err.Clear
   End If
End Function

'==================================================
'函数名:BytesToBstr
'作  用:将获取的源码转换为中文
'参  数:Body ------要转换的变量
'参  数:Cset ------要转换的类型
'==================================================
Function BytesToBstr(Body,Cset)
   Dim Objstream
   Set Objstream = Server.CreateObject("ad" & "odb.str" & "eam")
   objstream.Type = 1
   objstream.Mode =3
   objstream.Open
   objstream.Write body
   objstream.Position = 0
   objstream.Type = 2
   objstream.Charset = Cset
   BytesToBstr = objstream.ReadText
   objstream.Close
   set objstream = nothing
End Function

'==================================================
'函数名:PostHttpPage
'作  用:登录
'==================================================
Function PostHttpPage(RefererUrl,PostUrl,PostData)
    Dim xmlHttp
    Dim RetStr     
    Set xmlHttp = CreateObject("Msx" & "ml2.XM" & "LHT" & "TP") 
    xmlHttp.Open "POST", PostUrl, False
    XmlHTTP.setRequestHeader "Content-Length",Len(PostData)
    xmlHttp.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    xmlHttp.setRequestHeader "Referer", RefererUrl
    xmlHttp.Send PostData
    If Err.Number <> 0 Then
        Set xmlHttp=Nothing
        PostHttpPage = "$False$"
        Exit Function
    End If
    PostHttpPage=bytesToBSTR(xmlHttp.responseBody,"GB2312")
    Set xmlHttp = nothing
End Function

'==================================================
'函数名:UrlEncoding
'作  用:转换编码
'==================================================
Function UrlEncoding(DataStr)
    Dim StrReturn,Si,ThisChr,InnerCode,Hight8,Low8
    StrReturn = ""
    For Si = 1 To Len(DataStr)
        ThisChr = Mid(DataStr,Si,1)
        If Abs(Asc(ThisChr)) < &HFF Then
            StrReturn = StrReturn & ThisChr
        Else
            InnerCode = Asc(ThisChr)
            If InnerCode < 0 Then
               InnerCode = InnerCode + &H10000
            End If
            Hight8 = (InnerCode  And &HFF00)\ &HFF
            Low8 = InnerCode And &HFF
            StrReturn = StrReturn & "%" & Hex(Hight8) &  "%" & Hex(Low8)
        End If
    Next
    UrlEncoding = StrReturn
End Function

'==================================================
'函数名:GetBody
'作  用:截取字符串
'参  数:ConStr ------将要截取的字符串
'参  数:StartStr ------开始字符串
'参  数:OverStr ------结束字符串
'参  数:IncluL ------是否包含StartStr
'参  数:IncluR ------是否包含OverStr
'==================================================
Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
   If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
      GetBody="$False$"
      Exit Function
   End If
   Dim ConStrTemp
   Dim Start,Over
   ConStrTemp=Lcase(ConStr)
   StartStr=Lcase(StartStr)
   OverStr=Lcase(OverStr)
   Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
   If Start<=0 then
      GetBody="$False$"
      Exit Function
   Else
      If IncluL=False Then
         Start=Start+LenB(StartStr)
      End If
   End If
   Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
   If Over<=0 Or Over<=Start then
      GetBody="$False$"
      Exit Function
   Else
      If IncluR=True Then
         Over=Over+LenB(OverStr)
      End If
   End If
   GetBody=MidB(ConStr,Start,Over-Start)
End Function

'==================================================
'函数名:GetArray
'作  用:提取链接地址,以$Array$分隔
'参  数:ConStr ------提取地址的原字符
'参  数:StartStr ------开始字符串
'参  数:OverStr ------结束字符串
'参  数:IncluL ------是否包含StartStr
'参  数:IncluR ------是否包含OverStr
'==================================================
Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
   If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or  IsNull(StartStr)=True Or IsNull(OverStr)=True Then
      GetArray="$False$"
      Exit Function
   End If
   Dim TempStr,TempStr2,objRegExp,Matches,Match
   TempStr=""
   Set objRegExp = New Regexp
   objRegExp.IgnoreCase = True
   objRegExp.Global = True
   objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")"
   Set Matches =objRegExp.Execute(ConStr)
   For Each Match in Matches
      TempStr=TempStr & "$Array$" & Match.Value
   Next
   Set Matches=nothing

   If TempStr="" Then
      GetArray="$False$"
      Exit Function
   End If
   TempStr=Right(TempStr,Len(TempStr)-7)
   If IncluL=False then
      objRegExp.Pattern =StartStr
      TempStr=objRegExp.Replace(TempStr,"")
   End if
   If IncluR=False then
      objRegExp.Pattern =OverStr
      TempStr=objRegExp.Replace(TempStr,"")
   End if
   Set objRegExp=nothing
   Set Matches=nothing
  
   TempStr=Replace(TempStr,"""","")
   TempStr=Replace(TempStr,"'","")
   TempStr=Replace(TempStr," ","")
   TempStr=Replace(TempStr,"(","")
   TempStr=Replace(TempStr,")","")

   If TempStr="" then
      GetArray="$False$"
   Else
      GetArray=TempStr
   End if
End Function

'==================================================
'函数名:DefiniteUrl
'作  用:将相对地址转换为绝对地址
'参  数:PrimitiveUrl ------要转换的相对地址
'参  数:ConsultUrl ------当前网页地址
'==================================================
Function DefiniteUrl(Byval PrimitiveUrl,Byval ConsultUrl)
   Dim ConTemp,PriTemp,Pi,Ci,PriArray,ConArray
   If PrimitiveUrl="" or ConsultUrl="" or PrimitiveUrl="$False$" or ConsultUrl="$False$" Then
      DefiniteUrl="$False$"
      Exit Function
   End If
   If Left(Lcase(ConsultUrl),7)<>"http://" Then
      ConsultUrl= "http://" & ConsultUrl
   End If
   ConsultUrl=Replace(ConsultUrl,"\","/")
   ConsultUrl=Replace(ConsultUrl,"://",":\\")
   PrimitiveUrl=Replace(PrimitiveUrl,"\","/")

   If Right(ConsultUrl,1)<>"/" Then
      If Instr(ConsultUrl,"/")>0 Then
         If Instr(Right(ConsultUrl,Len(ConsultUrl)-InstrRev(ConsultUrl,"/")),".")>0 then  
         Else
            ConsultUrl=ConsultUrl & "/"
         End If
      Else
         ConsultUrl=ConsultUrl & "/"
      End If
   End If
   ConArray=Split(ConsultUrl,"/")

   If Left(LCase(PrimitiveUrl),7) = "http://" then
      DefiniteUrl=Replace(PrimitiveUrl,"://",":\\")
   ElseIf Left(PrimitiveUrl,1) = "/" Then
      DefiniteUrl=ConArray(0) & PrimitiveUrl
   ElseIf Left(PrimitiveUrl,2)="./" Then
      PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-2)
      If Right(ConsultUrl,1)="/" Then  
         DefiniteUrl=ConsultUrl & PrimitiveUrl
      Else
         DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl
      End If
   ElseIf Left(PrimitiveUrl,3)="../" then
      Do While Left(PrimitiveUrl,3)="../"
         PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-3)
         Pi=Pi+1
      Loop           
      For Ci=0 to (Ubound(ConArray)-1-Pi)
         If DefiniteUrl<>"" Then
            DefiniteUrl=DefiniteUrl & "/" & ConArray(Ci)
         Else
            DefiniteUrl=ConArray(Ci)
         End If
      Next
      DefiniteUrl=DefiniteUrl & "/" & PrimitiveUrl
   Else
      If Instr(PrimitiveUrl,"/")>0 Then
         PriArray=Split(PrimitiveUrl,"/")
         If Instr(PriArray(0),".")>0 Then
            If Right(PrimitiveUrl,1)="/" Then
               DefiniteUrl="http:\\" & PrimitiveUrl
            Else
               If Instr(PriArray(Ubound(PriArray)-1),".")>0 Then
                  DefiniteUrl="http:\\" & PrimitiveUrl
               Else
                  DefiniteUrl="http:\\" & PrimitiveUrl & "/"
               End If
            End If     
         Else
            If Right(ConsultUrl,1)="/" Then  
               DefiniteUrl=ConsultUrl & PrimitiveUrl
            Else
               DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl
            End If
         End If
      Else
         If Instr(PrimitiveUrl,".")>0 Then
            If Right(ConsultUrl,1)="/" Then
               If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then
                  DefiniteUrl="http:\\" & PrimitiveUrl & "/"
               Else
                  DefiniteUrl=ConsultUrl & PrimitiveUrl
               End If
            Else
               If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then
                  DefiniteUrl="http:\\" & PrimitiveUrl & "/"
               Else
                  DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl
               End If
            End If
         Else
            If Right(ConsultUrl,1)="/" Then
               DefiniteUrl=ConsultUrl & PrimitiveUrl & "/"
            Else
               DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl & "/"
            End If        
         End If
      End If
   End If
   If Left(DefiniteUrl,1)="/" then
     DefiniteUrl=Right(DefiniteUrl,Len(DefiniteUrl)-1)
   End if
   If DefiniteUrl<>"" Then
      DefiniteUrl=Replace(DefiniteUrl,"//","/")
      DefiniteUrl=Replace(DefiniteUrl,":\\","://")
   Else
      DefiniteUrl="$False$"
   End If
End Function

'==================================================
'函数名:ReplaceSaveRemoteFile
'作  用:替换、保存远程图片
'参  数:ConStr ------ 要替换的字符串
'参  数:SaveTf ------ 是否保存文件,False不保存,True保存
'参  数: TistUrl------ 当前网页地址
'==================================================
Function ReplaceSaveRemoteFile(ConStr,InstallPath,strChannelDir,SaveTf,TistUrl)
   If ConStr="$False$" or ConStr="" or InstallPath="" or strChannelDir="" Then
      ReplaceSaveRemoteFile=ConStr
      Exit Function
   End If
   Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2

   Set Re = New Regexp
   Re.IgnoreCase = True
   Re.Global = True
   Re.Pattern ="<img.+?>"
   Set Matches =Re.Execute(ConStr)
   For Each Match in Matches
      If TempStr<>"" then
         TempStr=TempStr & "$Array$" & Match.Value
      Else
         TempStr=Match.Value
      End if
   Next
   If TempStr<>"" Then
      TempArray=Split(TempStr,"$Array$")
      TempStr=""
      For Tempi=0 To Ubound(TempArray)
         Re.Pattern ="src\s*=\s*.+?\.(gifjpgbmpjpegpsdpngsvgdxfwmftiff)"
         Set Matches =Re.Execute(TempArray(Tempi))
         For Each Match in Matches
            If TempStr<>"" then
               TempStr=TempStr & "$Array$" & Match.Value
            Else
               TempStr=Match.Value
            End if
         Next
      Next
   End if
   If TempStr<>"" Then
      Re.Pattern ="src\s*=\s*"
      TempStr=Re.Replace(TempStr,"")
   End If
   Set Matches=nothing
   Set Re=nothing
   If TempStr="" or IsNull(TempStr)=True Then
      ReplaceSaveRemoteFile=ConStr
      Exit function
   End if
   TempStr=Replace(TempStr,"""","")
   TempStr=Replace(TempStr,"'","")
   TempStr=Replace(TempStr," ","")
   Dim RemoteFileurl,SavePath,PathTemp,DtNow,strFileName,strFileType,ArrSaveFileName,RanNum,Arr_Path
   DtNow=Now()
 '***********************************
   If SaveTf=True then
      SavePath=InstallPath&strChannelDir
         If CheckDir(InstallPath & strChannelDir)=False Then
            If Not CreateMultiFolder(InstallPath & strChannelDir) Then
            response.Write InstallPath & strChannelDir&"目录创建失败"
               SaveTf=False
            End If
         End If
   End If

   '去掉重复图片开始
   TempArray=Split(TempStr,"$Array$")
   TempStr=""
   For Tempi=0 To Ubound(TempArray)
      If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then
         TempStr=TempStr & "$Array$" & TempArray(Tempi)
      End If
   Next
   TempStr=Right(TempStr,Len(TempStr)-7)
   TempArray=Split(TempStr,"$Array$")
   '去掉重复图片结束

 response.Write "<br>发现图片:<br>"&Replace(TempStr,"$Array$","<br>")

   '转换相对图片地址开始
   TempStr=""
   For Tempi=0 To Ubound(TempArray)
      TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl)
   Next
   TempStr=Right(TempStr,Len(TempStr)-7)
   TempStr=Replace(TempStr,Chr(0),"")
   TempArray2=Split(TempStr,"$Array$")
   TempStr=""
   '转换相对图片地址结束

   '图片替换/保存
   Set Re = New Regexp
   Re.IgnoreCase = True
   Re.Global = True

   For Tempi=0 To Ubound(TempArray2)
'********************************
      RemoteFileUrl=TempArray2(Tempi)
      If RemoteFileUrl<>"$False$" And SaveTf=True Then'保存图片
         ArrSaveFileName = Split(RemoteFileurl,".")
   strFileType=Lcase(ArrSaveFileName(Ubound(ArrSaveFileName)))'文件类型
         If strFileType="asp" or strFileType="asa" or strFileType="aspx" or strFileType="cer" or strFileType="cdx" or strFileType="exe" or strFileType="rar" or strFileType="zip" then
            UploadFiles=""
            ReplaceSaveRemoteFile=ConStr
            Exit Function
         End If

         Randomize
         RanNum=Int(900*Rnd)+100
   strFileName = year(DtNow) & right("0" & month(DtNow),2) & right("0" & day(DtNow),2) & right("0" & hour(DtNow),2) & right("0" & minute(DtNow),2) & right("0" & second(DtNow),2) & ranNum & "." & strFileType
         Re.Pattern =TempArray(Tempi)
   response.Write "<br>保存到本地地址:"&InstallPath & strChannelDir & strFileName
   If SaveRemoteFile(InstallPath & strChannelDir & strFileName,RemoteFileUrl,RemoteFileUrl)=True Then
   response.Write "<font color=blue>成功</font><br>"
            PathTemp=InstallPath & strChannelDir & strFileName
            ConStr=Re.Replace(ConStr,PathTemp)
            Re.Pattern=InstallPath&strChannelDir
            UploadFiles=UploadFiles & "" & InstallPath & strChannelDir & strFileName
         Else
            PathTemp=RemoteFileUrl
            ConStr=Re.Replace(ConStr,PathTemp)
         End If
      ElseIf RemoteFileurl<>"$False$" and SaveTf=False Then'不保存图片
         Re.Pattern =TempArray(Tempi)
         ConStr=Re.Replace(ConStr,RemoteFileUrl)
      End If
'********************************
   Next  
   Set Re=nothing
   ReplaceSaveRemoteFile=ConStr
End function

'==================================================
'函数名:ReplaceSwfFile
'作  用:解析动画路径
'参  数:ConStr ------ 要替换的字符串
'参  数: TistUrl------ 当前网页地址
'==================================================
Function ReplaceSwfFile(ConStr,TistUrl)
   If ConStr="$False$" or ConStr="" or TistUrl="" or TistUrl="$False$" Then
      ReplaceSwfFile=ConStr
      Exit Function
   End If
   Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2

   Set Re = New Regexp
   Re.IgnoreCase = True
   Re.Global = True
   Re.Pattern ="<object.+?[^\>]>"
   Set Matches =Re.Execute(ConStr)
   For Each Match in Matches
      If TempStr<>"" then
         TempStr=TempStr & "$Array$" & Match.Value
      Else
         TempStr=Match.Value
      End if
   Next
   If TempStr<>"" Then
      TempArray=Split(TempStr,"$Array$")
      TempStr=""
      For Tempi=0 To Ubound(TempArray)
         Re.Pattern ="value\s*=\s*.+?\.swf"
         Set Matches =Re.Execute(TempArray(Tempi))
         For Each Match in Matches
            If TempStr<>"" then
               TempStr=TempStr & "$Array$" & Match.Value
            Else
               TempStr=Match.Value
            End if
         Next
      Next
   End if
   If TempStr<>"" Then
      Re.Pattern ="value\s*=\s*"
      TempStr=Re.Replace(TempStr,"")
   End If
   If TempStr="" or IsNull(TempStr)=True Then
      ReplaceSwfFile=ConStr
      Exit function
   End if
   TempStr=Replace(TempStr,"""","")
   TempStr=Replace(TempStr,"'","")
   TempStr=Replace(TempStr," ","")

   Set Matches=nothing
   Set Re=nothing

   '去掉重复文件开始
   TempArray=Split(TempStr,"$Array$")
   TempStr=""
   For Tempi=0 To Ubound(TempArray)
      If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then
         TempStr=TempStr & "$Array$" & TempArray(Tempi)
      End If
   Next
   TempStr=Right(TempStr,Len(TempStr)-7)
   TempArray=Split(TempStr,"$Array$")
   '去掉重复文件结束

   '转换相对地址开始
   TempStr=""
   For Tempi=0 To Ubound(TempArray)
      TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl)
   Next
   TempStr=Right(TempStr,Len(TempStr)-7)
   TempStr=Replace(TempStr,Chr(0),"")
   TempArray2=Split(TempStr,"$Array$")
   TempStr=""
   '转换相对地址结束

   '替换
   Set Re = New Regexp
   Re.IgnoreCase = True
   Re.Global = True
   For Tempi=0 To Ubound(TempArray2)
      RemoteFileUrl=TempArray2(Tempi)
      Re.Pattern =TempArray(Tempi)
      ConStr=Re.Replace(ConStr,RemoteFileUrl)
   Next  
   Set Re=nothing
   ReplaceSwfFile=ConStr
End function

'==================================================
'过程名:SaveRemoteFile
'作  用:保存远程的文件到本地
'参  数:LocalFileName ------ 本地文件名
'参  数:RemoteFileUrl ------ 远程文件URL
'参  数:Referer ------ 远程调用文件(对付防采集的,用内容页地址,没有防的留空)
'==================================================
Function SaveRemoteFile(LocalFileName,RemoteFileUrl,Referer)
    SaveRemoteFile=True
 dim Ads,Retrieval,GetRemoteData
 Set Retrieval = Server.CreateObject("Microsoft.XMLHTTP")
 With Retrieval
  .Open "Get", RemoteFileUrl, False, "", ""
  if Referer<>"" then .setRequestHeader "Referer",Referer
  .Send
        If .Readystate<>4 then
            SaveRemoteFile=False
            Exit Function
        End If
  GetRemoteData = .ResponseBody
 End With
 Set Retrieval = Nothing
 Set Ads = Server.CreateObject("Adodb.Stream")
 With Ads
  .Type = 1
  .Open
  .Write GetRemoteData
  .SaveToFile server.MapPath(LocalFileName),2
  .Cancel()
  .Close()
 End With
 Set Ads=nothing
end Function

'==================================================
'函数名:GetPaing
'作  用:获取分页
'==================================================
Function GetPaing(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" Or StartStr="" Or OverStr="" or IsNull(ConStr)=True or IsNull(StartStr)=True Or IsNull(OverStr)=True Then
   GetPaing="$False$"
   Exit Function
End If

Dim Start,Over,ConTemp,TempStr
TempStr=LCase(ConStr)
StartStr=LCase(StartStr)
OverStr=LCase(OverStr)
Over=Instr(1,TempStr,OverStr)
If Over<=0 Then
   GetPaing="$False$"
   Exit Function
Else
   If IncluR=True Then
      Over=Over+Len(OverStr)
   End If
End If
TempStr=Mid(TempStr,1,Over)
Start=InstrRev(TempStr,StartStr)
If IncluL=False Then
   Start=Start+Len(StartStr)
End If

If Start<=0 Or Start>=Over Then
   GetPaing="$False$"
   Exit Function
End If
ConTemp=Mid(ConStr,Start,Over-Start)

ConTemp=Trim(ConTemp)
'ConTemp=Replace(ConTemp," ","")
ConTemp=Replace(ConTemp,",","")
ConTemp=Replace(ConTemp,"'","")
ConTemp=Replace(ConTemp,"""","")
ConTemp=Replace(ConTemp,">","")
ConTemp=Replace(ConTemp,"<","")
ConTemp=Replace(ConTemp," ;","")
GetPaing=ConTemp
End Function

'*************************************************
'函数名:gotTopic
'作  用:截字符串,汉字一个算两个字符,英文算一个字符
'参  数:str   ----原字符串
'        strlen ----截取长度
'返回值:截取后的字符串
'*************************************************
function gotTopic(str,strlen)
 if str="" then
  gotTopic=""
  exit function
 end if
 dim l,t,c, i
 str=replace(replace(replace(replace(str," "," "),""",chr(34)),">",">"),"<","<")
 l=len(str)
 t=0
 for i=1 to l
  c=Abs(Asc(Mid(str,i,1)))
  if c>255 then
   t=t+2
  else
   t=t+1
  end if
  if t>=strlen then
   gotTopic=left(str,i) & "…"
   exit for
  else
   gotTopic=str
  end if
 next
 gotTopic=replace(replace(replace(replace(gotTopic," "," "),chr(34),"""),">",">"),"<","&lt;")
end function

'***********************************************
'函数名:JoinChar
'作  用:向地址中加入 ? 或 &
'参  数:strUrl  ----网址
'返回值:加了 ? 或 & 的网址
'***********************************************
function JoinChar(strUrl)
 if strUrl="" then
  JoinChar=""
  exit function
 end if
 if InStr(strUrl,"?")<len(strUrl) then
  if InStr(strUrl,"?")>1 then
   if InStr(strUrl,"&")<len(strUrl) then
    JoinChar=strUrl & "&"
   else
    JoinChar=strUrl
   end if
  else
   JoinChar=strUrl & "?"
  end if
 else
  JoinChar=strUrl
 end if
end function

'**************************************************
'函数名:CreateKeyWord
'作  用:由给定的字符串生成关键字
'参  数:Constr---要生成关键字的原字符串
'返回值:生成的关键字
'**************************************************
Function CreateKeyWord(byval Constr,Num)
   If Constr="" or IsNull(Constr)=True or Constr="$False$" Then
      CreateKeyWord="$False$"
      Exit Function
   End If
   If Num="" or IsNumeric(Num)=False Then
      Num=2
   End If
   Constr=Replace(Constr,CHR(32),"")
   Constr=Replace(Constr,CHR(9),"")
   Constr=Replace(Constr," ","")
   Constr=Replace(Constr," ","")
   Constr=Replace(Constr,"(","")
   Constr=Replace(Constr,")","")
   Constr=Replace(Constr,"<","")
   Constr=Replace(Constr,">","")
   Constr=Replace(Constr,"""","")
   Constr=Replace(Constr,"?","")
   Constr=Replace(Constr,"*","")
   Constr=Replace(Constr,"","")
   Constr=Replace(Constr,",","")
   Constr=Replace(Constr,".","")
   Constr=Replace(Constr,"/","")
   Constr=Replace(Constr,"\","")
   Constr=Replace(Constr,"-","")
   Constr=Replace(Constr,"@","")
   Constr=Replace(Constr,"#","")
   Constr=Replace(Constr,"$","")
   Constr=Replace(Constr,"%","")
   Constr=Replace(Constr,"&","")
   Constr=Replace(Constr,"+","")
   Constr=Replace(Constr,":","")
   Constr=Replace(Constr,":","")  
   Constr=Replace(Constr,"‘","")
   Constr=Replace(Constr,"“","")
   Constr=Replace(Constr,"”","")        
   Dim i,ConstrTemp
   For i=1 To Len(Constr)
      ConstrTemp=ConstrTemp & "" & Mid(Constr,i,Num)
   Next
   If Len(ConstrTemp)<254 Then
      ConstrTemp=ConstrTemp & ""
   Else
      ConstrTemp=Left(ConstrTemp,254) & ""
   End If
   CreateKeyWord=ConstrTemp
End Function

'==================================================
'函数名:CheckUrl
'作  用:检查Url
'参  数:strUrl ------ 要检查Url
'==================================================
Function CheckUrl(strUrl)
   Dim Re
   Set Re=new RegExp
   Re.IgnoreCase =true
   Re.Global=True
   Re.Pattern="http://([\w-]+\.)+[\w-]+(/[\w-./?%&=]*)?"
   If Re.test(strUrl)=True Then
      CheckUrl=strUrl
   Else
      CheckUrl="$False$"
   End If
   Set Rs=Nothing
End Function

'==================================================
'函数名:ScriptHtml
'作  用:过滤html标记
'参  数:ConStr ------ 要过滤的字符串
'==================================================
Function ScriptHtml(Byval ConStr,TagName,FType)
    Dim Re
    Set Re=new RegExp
    Re.IgnoreCase =true
    Re.Global=True
    Select Case FType
    Case 1
       Re.Pattern="<" & TagName & "([^>])*>"
       ConStr=Re.Replace(ConStr,"")
    Case 2
       Re.Pattern="<" & TagName & "([^>])*>.*?</" & TagName & "([^>])*>"
       ConStr=Re.Replace(ConStr,"")
    Case 3
       Re.Pattern="<" & TagName & "([^>])*>"
       ConStr=Re.Replace(ConStr,"")
       Re.Pattern="</" & TagName & "([^>])*>"
       ConStr=Re.Replace(ConStr,"")
    End Select
    ScriptHtml=ConStr
    Set Re=Nothing
End Function

'==================================================
'函数名:RemoveHTML
'作  用:完全去除html标记
'参  数:strHTML ------ 要过滤的字符串
'==================================================
Function RemoveHTML(strHTML)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp

objRegExp.IgnoreCase = True
objRegExp.Global = True
'取闭合的<>
objRegExp.Pattern = "<.+?>"
'进行匹配
Set Matches = objRegExp.Execute(strHTML)

' 遍历匹配集合,并替换掉匹配的项目
For Each Match in Matches
strHtml=Replace(strHTML,Match.Value,"")
Next
RemoveHTML=strHTML
Set objRegExp = Nothing
End Function

'==================================================
'函数名:CheckDir
'作  用:检查文件夹是否存在
'参  数:FolderPath ------ 文件夹路径
'==================================================
Function CheckDir(byval FolderPath)
 dim fso
 Set fso = Server.CreateObject("Scripting.FileSystemObject")
 If fso.FolderExists(Server.MapPath(folderpath)) then
 '存在
  CheckDir = True
 Else
 '不存在
  CheckDir = False
 End if
 Set fso = nothing
End Function

'==================================================
'函数名:MakeNewsDir
'作  用:创建文件夹
'参  数:foldername ------ 文件夹名
'==================================================
Function MakeNewsDir(byval foldername)
 dim fso
 Set fso = Server.CreateObject("Scri" & "pti" & "ng.Fil" & "eSyst" & "emOb" & "ject")
        fso.CreateFolder(Server.MapPath(foldername))
        If fso.FolderExists(Server.MapPath(foldername)) Then
           MakeNewsDir = True
        Else
           MakeNewsDir = False
        End If
 Set fso = nothing
End Function

'==================================================
'函数名:DelDir
'作  用:创建文件夹
'参  数:foldername ------ 文件夹名
'==================================================
Function DelDir(byval foldername)
 dim fso
    Set fso = Server.CreateObject("Scri" & "pti" & "ng.Fil" & "eSyst" & "emOb" & "ject")
    If fso.FolderExists(Server.MapPath(foldername)) Then         '判断文件夹是否存在
       fso.DeleteFolder (Server.MapPath(foldername))            '删除文件夹
    End If
 Set fso = nothing
End Function

'**************************************************
'函数名:IsObjInstalled
'作  用:检查组件是否已经安装
'参  数:strClassString ----组件名
'返回值:True  ----已经安装
'        False ----没有安装
'**************************************************
Function IsObjInstalled(strClassString)
 IsObjInstalled = False
 Err = 0
 Dim xTestObj
 Set xTestObj = Server.CreateObject(strClassString)
 If 0 = Err Then IsObjInstalled = True
 Set xTestObj = Nothing
 Err = 0
End Function

'**************************************************
'函数名:strLength
'作  用:求字符串长度。汉字算两个字符,英文算一个字符。
'参  数:str  ----要求长度的字符串
'返回值:字符串长度
'**************************************************
function strLength(str)
 ON ERROR RESUME NEXT
 dim WINNT_CHINESE
 WINNT_CHINESE    = (len("中国")=2)
 if WINNT_CHINESE then
        dim l,t,c
        dim i
        l=len(str)
        t=l
        for i=1 to l
         c=asc(mid(str,i,1))
            if c<0 then c=c+65536
            if c>255 then
                t=t+1
            end if
        next
        strLength=t
    else
        strLength=len(str)
    end if
    if err.number<>0 then err.clear
end function

'****************************************************
'函数名:CreateMultiFolder
'作  用:创建多级目录,可以创建不存在的根目录
'参  数:要创建的目录名称,可以是多级
'返回逻辑值:True成功,False失败
'创建目录的根目录从当前目录开始
'****************************************************
Function CreateMultiFolder(ByVal CFolder)
 Dim objFSO,PhCreateFolder,CreateFolderArray,CreateFolder
 Dim i,ii,CreateFolderSub,PhCreateFolderSub,BlInfo
 BlInfo = False
 CreateFolder = CFolder
 On Error Resume Next
 Set objFSO = Server.CreateObject("Scri" & "pti" & "ng.Fil" & "eSyst" & "emOb" & "ject")
 If Err Then
  Err.Clear()
  Exit Function
 End If
 CreateFolder = Replace(CreateFolder,"\","/")
 If Left(CreateFolder,1)="/" Then
  'CreateFolder = Right(CreateFolder,Len(CreateFolder)-1)
 End If
 If Right(CreateFolder,1)="/" Then
  CreateFolder = Left(CreateFolder,Len(CreateFolder)-1)
 End If
 CreateFolderArray = Split(CreateFolder,"/")
 For i = 0 to UBound(CreateFolderArray)
  CreateFolderSub = ""
  For ii = 0 to i
   CreateFolderSub = CreateFolderSub & CreateFolderArray(ii) & "/"
  Next
  PhCreateFolderSub = Server.MapPath(CreateFolderSub)

'response.Write PhCreateFolderSub&"<br>"

  If Not objFSO.FolderExists(PhCreateFolderSub) Then
   objFSO.CreateFolder(PhCreateFolderSub)
  End If
 Next
 If Err Then
  Err.Clear()
 Else
  BlInfo = True
 End If
 Set objFSO=nothing
 CreateMultiFolder = BlInfo
End Function

'**************************************************
'函数名:FSOFileRead
'作  用:使用FSO读取文件内容的函数
'参  数:filename  ----文件名称
'返回值:文件内容
'**************************************************
  function FSOFileRead(filename)
  Dim objFSO,objCountFile,FiletempData
  Set objFSO = Server.CreateObject("Scripting.FileSystemObject")
  Set objCountFile = objFSO.OpenTextFile(Server.MapPath(filename),1,True)
  FSOFileRead = objCountFile.ReadAll
  objCountFile.Close
  Set objCountFile=Nothing
  Set objFSO = Nothing
  End Function

'**************************************************
'函数名:FSOlinedit
'作  用:使用FSO读取文件某一行的函数
'参  数:filename  ----文件名称
'        lineNum   ----行数
'返回值:文件该行内容
'**************************************************
  function FSOlinedit(filename,lineNum)
  if linenum < 1 then exit function
  dim fso,f,temparray,tempcnt
  set fso = server.CreateObject("scripting.filesystemobject")
  if not fso.fileExists(server.mappath(filename)) then exit function
  set f = fso.opentextfile(server.mappath(filename),1)
  if not f.AtEndofStream then
  tempcnt = f.readall
  f.close
  set f = nothing
  temparray = split(tempcnt,chr(13)&chr(10))
  if lineNum>ubound(temparray)+1 then
  exit function
  else
  FSOlinedit = temparray(lineNum-1)
  end if
  end if
  end function

'**************************************************
'函数名:FSOlinewrite
'作  用:使用FSO写文件某一行的函数
'参  数:filename    ----文件名称
'        lineNum     ----行数
'        Linecontent ----内容
'返回值:无
'**************************************************
  function FSOlinewrite(filename,lineNum,Linecontent)
  if linenum < 1 then exit function
  dim fso,f,temparray,tempCnt
  set fso = server.CreateObject("scripting.filesystemobject")
  if not fso.fileExists(server.mappath(filename)) then exit function
  set f = fso.opentextfile(server.mappath(filename),1)
  if not f.AtEndofStream then
  tempcnt = f.readall
  f.close
  temparray = split(tempcnt,chr(13)&chr(10))
  if lineNum>ubound(temparray)+1 then
  exit function
  else
  temparray(lineNum-1) = lineContent
  end if
  tempcnt = join(temparray,chr(13)&chr(10))
  set f = fso.createtextfile(server.mappath(filename),true)
  f.write tempcnt
  end if
  f.close
  set f = nothing
  end function

'**************************************************
'函数名:Htmlmake
'作  用:使用FSO创建文件
'参  数:HtmlFolder    ----路径
'        HtmlFilename     ----文件名
'        HtmlContent ----内容
'**************************************************
function Htmlmake(HtmlFolder,HtmlFilename,HtmlContent)
On Error Resume Next
     dim filepath,fso,fout
     filepath = HtmlFolder&"/"&HtmlFilename
     Set fso = Server.CreateObject("Scripting.FileSystemObject")
           If fso.FolderExists(HtmlFolder) Then
           Else
           CreateMultiFolder(HtmlFolder)
         &, ;nbs, p; End If
     Set fout = fso.Createtextfile(server.mappath(filepath),true)
     fout.writeline HtmlContent
     fout.close
     set fso=nothing
  Set fso = Server.CreateObject("Scripting.FileSystemObject")
  If fso.fileexists(Server.MapPath(filepath)) Then
  Response.Write "文件<font color=red>"&HtmlFilename&"</font>已生成!<br>"
  Else
  'Response.Write Server.MapPath(filepath)
  Response.Write "文件<font color=red>"&HtmlFilename&"</font>未生成!<br>"
  End If
  Set fso = nothing
End function

'**************************************************
'函数名:Htmldel
'作  用:使用FSO删除文件
'参  数:HtmlFolder    ----路径
'        HtmlFilename     ----文件名
'**************************************************
Sub Htmldel(HtmlFolder,HtmlFilename)
     dim filepath,fso
     filepath = HtmlFolder&"/"&HtmlFilename
     Set fso = CreateObject("Scripting.FileSystemObject")
     fso.DeleteFile(Server.mappath(filepath))
     Set fso = nothing
  Set fso = Server.CreateObject("Scripting.FileSystemObject")
  If fso.fileexists(Server.MapPath(filepath)) Then
  Response.Write "文件<font color=red>"&HtmlFilename&"</font>未删除!<br>"
  Else
  'Response.Write Server.MapPath(filepath)
  Response.Write "文件<font color=red>"&HtmlFilename&"</font>已删除!<br>"
  End If
  Set fso = nothing
End Sub

'=================================================
'过程名:HTMLEncode
'作  用:过滤HTML格式
'参  数:fString ----转换内容
'=================================================
function HTMLEncode(ByVal fString)
If IsNull(fString)=False or fString<>"" or fString<>"$False$" Then
fString = Replace(fString, ">", ">")
fString = Replace(fString, "<", "<")
fString = Replace(fString, Chr(32), " ")
fString = Replace(fString, Chr(9), " ")
fString = Replace(fString, Chr(34), """)
fString = Replace(fString, Chr(39), "'")
fString = Replace(fString, Chr(13), "")
fString = Replace(fString, " ", " ")
fString = Replace(fString, CHR(10) & CHR(10), "</P><P>")
fString = Replace(fString, Chr(10), "<br /> ")
HTMLEncode = fString
else
HTMLEncode = "$False$"
end if
end function

'=================================================
'过程名:unHTMLEncode
'作  用:还原HTML格式
'参  数:fString ----转换内容
'=================================================
function unHTMLEncode(ByVal fString)
If IsNull(fString)=False or fString<>"" or fString<>"$False$" Then
fString = Replace(fString, ">", ">")
fString = Replace(fString, "<", "<")
fString = Replace(fString, " ", Chr(32))
fString = Replace(fString, """, Chr(34))
fString = Replace(fString, "'", Chr(39))
fString = Replace(fString, "", Chr(13))
fString = Replace(fString, " ", " ")
fString = Replace(fString, "</P><P>" , CHR(10) & CHR(10))
fString = Replace(fString, "<br> ", Chr(10))
unHTMLEncode = fString
else
unHTMLEncode = "$False$"
end if
end function

function unhtmllist(content)
unhtmllist=content
if content <> "" then
unhtmllist=replace(unhtmllist,"'","&quot;")
unhtmllist=replace(unhtmllist,chr(10),"")
unHtmllist=replace(unHtmllist,chr(13),"<br>")
end if
end function

function unhtmllists(content)
unhtmllists=content
if content <> "" then
unhtmllists=replace(unhtmllists,"""","&quot;")
unhtmllists=replace(unhtmllists,"'","&quot;")
unhtmllists=replace(unhtmllists,chr(10),"")
unHtmllists=replace(unHtmllists,chr(13),"<br>")
end if
end function

function htmllists(content)
htmllists=content
if content <> "" then
htmllists=replace(htmllists,"‘’","""")
htmllists=replace(htmllists,"&quot;","'")
htmllists=replace(htmllists,"<br>",chr(13)&chr(10))
end if
end function

function uhtmllists(content)
uhtmllists=content
if content <> "" then
uhtmllists=replace(uhtmllists,"""","‘’")
uhtmllists=replace(uhtmllists,"'","&quot;")
uhtmllists=replace(uhtmllists,chr(10),"")
uHtmllists=replace(uHtmllists,chr(13),"<br>")
end if
end function

'=================================================
'过程: Sleep
'功能: 程序在此晢停几秒
'参数: iSeconds    要暂停的秒数
'=================================================
Sub Sleep(iSeconds)
    response.Write "<font color=blue>开始暂停 "&iSeconds&" 秒</font><br>"
    Dim t:t=Timer()
    While(Timer()<t+iSeconds)
        'Do Nothing
    Wend
    response.Write "<font color=blue>暂停 "&iSeconds&" 秒结束</font><br>"
End Sub

'==================================================
'函数名:MyArray
'作  用:提取标签,以分隔
'参  数:ConStr ------提取地址的原字符
'==================================================
Function MyArray(Byval ConStr)
   Set objRegExp = New Regexp
   objRegExp.IgnoreCase = True
   objRegExp.Global = True
   objRegExp.Pattern = "({).+?(})"
   Set Matches =objRegExp.Execute(ConStr)
   For Each Match in Matches
      TempStr=TempStr & "" & Match.Value
   Next
   Set Matches=nothing
  
      TempStr=Right(TempStr,Len(TempStr)-1)
      objRegExp.Pattern ="{"
      TempStr=objRegExp.Replace(TempStr,"")
      objRegExp.Pattern ="}"
      TempStr=objRegExp.Replace(TempStr,"")
   Set objRegExp=nothing
   Set Matches=nothing
  
   TempStr=Replace(TempStr,"$","")

   If TempStr="" then
      MyArray="在代码中没有可提取的东西"
   Else
      MyArray=TempStr
   End if
End Function

'==================================================
'函数名:randm
'作  用:产生6位随机数
'==================================================
Function randm
randomize
randm=Int((900000*rnd)+100000)
End Function
%>

时间: 2024-10-03 06:37:45

一个带采集远程文章内容,保存图片,生成文件等完整的采集功能的相关文章

php几种采集远程服务器内容代码

//方法一模仿用户访问网页  代码如下 复制代码 function readpr($link,$url) {  $fp = fsockopen ($url, 80, $errno, $errstr, 30);  if (!$fp)  {   echo "$errstr ($errno) ";   exit(1);  }  else  {   $out = "get $link http/1.0 ";   $out .= "host: $url "

php curl采集远程页面内容演示代码

例  代码如下 复制代码 $curlPost = 'a=1&b=2';//模拟POST数据 $ch = curl_init(); curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:0.0.0.0', 'CLIENT-IP:0.0.0.0'));  //构造IP curl_setopt($ch, CURLOPT_REFERER, "http://www.111cn.net/");   //构造来路 curl_s

php 利用curl模仿用户采集远程网站内容

 代码如下 复制代码 $timestamp = time(); function open($file,$type=''){         global $fromurl,$referer;         $cachename=$file;                 if($type){                         $file='/'.$type.'/'.$file;                 }else{                         $f

ajax获取远程文章内容代码

function $(elementId) {  //获取指定id的object  if (document.getElementById) {   return document.getElementById(elementId);  } else if (document.all) {   return document.all[elementId];  } else if (document.layers) {   return document.layers[elementId];  }

帝国cms用火车头采集文章内容分页教程

火车头采集:文章内容分页采集教程 采集文章的时候,难免遇到文章有分页,本教程讲解下内容分页的采集. 在规则的第二步:采集内容规则左下方有个"分页获取规则"选项卡,用来设置分页.如下图: 全部列出模式适用于分页地址全部显示出来如下图: 上下页上n页下n页适用用分页地址列出一部分如下图 我们先说下上下页模式的分页,以采集这个地址http://www.zzarea.com/2013/0117/1358391833627.html为例 在第一页我们查看分页代码的情况如下: 在第三页分页代码的情

求如何让网页根据文章内容自己自动生成关键字

问题描述 各位大侠么,我想让网页能自动根据里面的内容自动生成关键字,就是那个keywords和description.各位大侠能不能指点下 解决方案 解决方案二:这需要算法的,算法规则是人定的.比如,你可以使用一些分词器,统计里面出现字数最多的词语,作为关键字,这个还是比较麻烦的,一个是分词器比较麻烦,另外,对于关键字的选择,也比较麻烦.解决方案三:引用1楼lisliefor的回复: 这需要算法的,算法规则是人定的.比如,你可以使用一些分词器,统计里面出现字数最多的词语,作为关键字,这个还是比较

php判断远程文章是否存在程序代码

今天突发奇想, 我在给一客户写一案例时,有一个地方要求网友填写url但是,要保存是正确可访问的,虽然客户没要求但是我们还是想到了这样做,就是用php 的file_get_contents ,ob_get_contents() 来读取远程文章内容,如果有就真否则就假了,下面是我举的两个实例了.关于ob_get_contents() 我们要在php.ini里修改几个地方curl_开头的地方前面的;去了就OK了. <?php $url='http://www.111cn.net'; if(@file_

分享一个PHP采集远程图片

  <?php  /*使用PHP实现采集远程图片功能.基本流程:    1.获取目标网站图片地址.    2.读取图片内容.    3.创建要保存图片的路径并命名图片名称.    4.写入图片内容.    5.完成.    我们通过写几个函数来实现这一过程.*/    //函数make_dir()建立目录.判断要保存的图片文件目录是否存在,如果不存在则创建目录,并且将目录设置为可写权限.  function make_dir($path){       if(!file_exists($path

php文章内容分页并生成相应的htm静态页面代码_php技巧

复制代码 代码如下: <?php $url='test.php?1=1'; $contents="fjka;fjsa;#page#批量生成分成文件并且加上分页代码"; $ptext ='#page#'; ContentsPages($url,$contents,$ptext) /** * 函数名:ContentsPages * 功能:文章内容分页 * 参数:$url文章内容页URL 类型string,$contents文章内容页内容 类型string,$ptext分页标识 类型s