python模拟登陆网站抓取信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#coding=utf-8
import re
import urllib
import urllib2
import cookielib
import urllib,urllib2,cookielib
import re
class xiaobai:
post_data=""#登陆提交的参数
def __init__(self):
'''初始化类,并建立cookies值'''
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Opera/9.23')]
urllib2.install_opener(opener)

def login(self,loginurl,bianma):
'''模拟登陆'''
req = urllib2.Request(loginurl,self.post_data)
_response = urllib2.urlopen(req)
_d=_response.read()
_d =_d.decode(bianma)
return _d

def getpagehtml(self,pageurl,bianma):
'''获取目标网站任意一个页面的html代码'''
req2=urllib2.Request(pageurl)
_response2=urllib2.urlopen(req2)
_d2=_response2.read()
_d2 =_d2.decode(bianma)
return _d2
if __name__=="__main__":
x=xiaobai()
#参递一个post参数
x.post_data=urllib.urlencode({'uname':'张三','pass':'123','op':'login'})
y=x.login("http://www.lvye.org/user.php","utf-8")#登陆
#获取一个页面的html并输出
print x.getpagehtml("http://www.lvye.org/userinfo.php?uid=111111","utf-8")

点击并拖拽以移动