问题描述
- 爬虫 在网页定位验证码,然后显示出来,手动输入验证码,求代码
- 爬虫,在网页定位验证码,然后不管在哪里显示出来都行,然后手动输入验证码,继续爬,不需要登录,但是需要搜索功能,跪求完整代码,可以想象爬有验证码的百度。
解决方案
- - 我已经搞定了,直接复制图片链接, 放在img的src里 然后写个输入框 post一起提交过去
解决方案二:
以下是部分python代码:
#!usr/bin/env python#-*- coding: utf-8 -*-import osimport urllib2import urllibimport cookielibimport xml.etree.ElementTree as ET#-----------------------------------------------------------------------------# Login in www.***.com.cndef ChinaBiddingLogin(url username password): # Enable cookie support for urllib2 cookiejar=cookielib.CookieJar() urlopener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) urllib2.install_opener(urlopener) urlopener.addheaders.append(('Referer' 'http://www.chinabidding.com.cn/zbw/login/login.jsp')) urlopener.addheaders.append(('Accept-Language' 'zh-CN')) urlopener.addheaders.append(('Host' 'www.chinabidding.com.cn')) urlopener.addheaders.append(('User-Agent' 'Mozilla/5.0 (compatible; MISE 9.0; Windows NT 6.1); Trident/5.0')) urlopener.addheaders.append(('Connection' 'Keep-Alive')) print 'XXX Login......' imgurl=r'http://www.*****.com.cn/zbw/login/image.jsp' DownloadFile(imgurl urlopener) authcode=raw_input('Please enter the authcode:') #authcode=VerifyingCodeRecognization(r""http://192.168.0.106/images/code.jpg"") # Send login/password to the site and get the session cookie values={'login_id':username 'opl':'op_login' 'login_passwd':password 'login_check':authcode} urlcontent=urlopener.open(urllib2.Request(url urllib.urlencode(values))) page=urlcontent.read(500000) # Make sure we are logged in check the returned page content if page.find('login.jsp')!=-1: print 'Login failed with username=%s password=%s and authcode=%s' % (username password authcode) return False else: print 'Login succeeded!' return True#-----------------------------------------------------------------------------# Download from fileUrl then save to fileToSave# Note: the fileUrl must be a valid filedef DownloadFile(fileUrl urlopener): isDownOk=False try: if fileUrl: outfile=open(r'/var/www/images/code.jpg' 'w') outfile.write(urlopener.open(urllib2.Request(fileUrl)).read()) outfile.close() isDownOK=True else: print 'ERROR: fileUrl is NULL!' except: isDownOK=False return isDownOK#------------------------------------------------------------------------------# Verifying code recoginizationdef VerifyingCodeRecognization(imgurl): url=r'http://192.168.0.119:800/api?' user='admin' pwd='admin' model='ocr' ocrfile='cbi' values={'user':user 'pwd':pwd 'model':model 'ocrfile':ocrfile 'imgurl':imgurl} data=urllib.urlencode(values) try: url+=data urlcontent=urllib2.urlopen(url) except IOError: print '***ERROR: invalid URL (%s)' % url page=urlcontent.read(500000) # Parse the xml data and get the verifying code root=ET.fromstring(page) node_find=root.find('AddField') authcode=node_find.attrib['data'] return authcode#------------------------------------------------------------------------------# Read users from configure filedef ReadUsersFromFile(filename): users={} for eachLine in open(filename 'r'): info=[w for w in eachLine.strip().split()] if len(info)==2: users[info[0]]=info[1] return users#------------------------------------------------------------------------------def main(): login_page=r'http://www.***.com.cnlogin/login.jsp' download_page=r'http://www.***.com.cn***/***?record_id=' start_id=8593330 end_id=8595000 now_id=start_id Users=ReadUsersFromFile('users.conf') while True: for key in Users: if ChinaBiddingLogin(login_page key Users[key]): for i in range(3): pageUrl=download_page+'%d' % now_id urlcontent=urllib2.urlopen(pageUrl) filepath='./download/%s.html' % now_id f=open(filepath 'w') f.write(urlcontent.read(500000)) f.close() now_id+=1 else: continue#------------------------------------------------------------------------------if __name__=='__main__': main()
解决方案三:
如果别人在页面中只是显示图片,那么 你要用正则来获取对应的区域 将里面的图片输出流显示在你想显示的地方
现在目前 要攻破验证码还是比较困难的,因为可以使用KEY来做 页面完全 不需要有验证码的数据 只是图片流而已
解决方案四:
http://www.uuwise.com/
识别验证码,你要拿到验证码的图片,然后发送给上面这个网址的地方,他们会返回一个字符串的验证码给你。
解决方案五:
验证码是可以机器识别的
时间: 2024-10-15 21:31:24