# -*- coding: utf8 -*-
'''
'''
import urllib2
import urllib
import cookielib
import hashlib
import re
import time
import json
from pip._vendor.distlib._backport.tarfile import TUREAD
class Spide:
def __init__(self,proxy_ip,proxy_type,proxy_port,use_proxy=False):
print 'using the proxy info :',proxy_ip
self.proxy = urllib2.ProxyHandler({proxy_type: proxy_ip+":"+proxy_port})
self.usercode = ""
self.userid = ""
self.cj = cookielib.LWPCookieJar();
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj));
if use_proxy:
self.opener = urllib2.build_opener(self.proxy)
urllib2.install_opener(self.opener);
#获取代理列表
def get_proxy(self):
proxy_info_json = ""
#first get the proxy info from
try:
reqRequest_proxy = urllib2.Request('http://gXiXmXmXeXpXrXoXxXy.com/api/getProxy');
reqRequest_proxy.add_header('Accept','*/*');
reqRequest_proxy.add_header('Accept-Language','zh-CN,zh;q=0.8');
reqRequest_proxy.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36');
reqRequest_proxy.add_header('Content-Type','application/x-www-form-urlencoded');
proxy_info = urllib2.urlopen(reqRequest_proxy).read();
print proxy_info
proxy_info_json = json.loads(proxy_info)
return_str=proxy_info_json['protocol']+":"+proxy_info_json['ip']+proxy_info_json['port']
except Exception,e:
print 'proxy have problem'
#print proxy_info_json['protocol']
#print proxy_info_json['ip']
#print proxy_info_json['port']
return proxy_info_json
#print proxy_info
def chrome(self):
try:
reqRequest = urllib2.Request('http://www.503error.com');
reqRequest.add_header('Accept','*/*');
reqRequest.add_header('Accept-Language','zh-CN,zh;q=0.8');
reqRequest.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36');
reqRequest.add_header('Content-Type','application/x-www-form-urlencoded');
content = urllib2.urlopen(reqRequest).read();
except Exception,e:
print 'oppps'
print 'done'
if __name__ == "__main__":
for count in range(100):
print '################################:',count
print 'Geting the new proxy info:'
test = Spide(proxy_ip='test',proxy_type='http',proxy_port='3128',use_proxy=False)
proxy_list = test.get_proxy()
#print proxy_list
print 'start to chrome'
spide1 = Spide(proxy_ip=proxy_list['ip'],proxy_type=proxy_list['protocol'],proxy_port=proxy_list['port'],use_proxy=True)
spide1.chrome()
time.sleep(5)
|