1
http://www.heibanke.com/lesson/crawler_ex00/
1 | import urllib.request |
2
http://www.heibanke.com/lesson/crawler_ex01/
1 | import requests |
3
http://www.heibanke.com/lesson/crawler_ex02/1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25import requests
import re
url1 = 'http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex02/'
url2 = 'http://www.heibanke.com/lesson/crawler_ex02/'
temp = requests.session()
temp.get(url1)
token = temp.cookies['csrftoken']
index = 0
data= {'username':'admin','password':123456,'csrfmiddlewaretoken':token}
temp.post(url1,data)
sol = 1
temp.get(url2)
token = temp.cookies['csrftoken']
data= {'username':'admin','password':0,'csrfmiddlewaretoken':token}
while sol:
index += 1
data['password'] = index
html = temp.post(url2,data).text
sol = re.findall(r'密码错误',html)
print(index)
print(index,html)
4
http://www.heibanke.com/lesson/crawler_ex03/1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43import requests
import re
from threading import Thread
import time
url1 = 'http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex03/'
url2 = 'http://www.heibanke.com/lesson/crawler_ex03/'
url3 = 'http://www.heibanke.com/lesson/crawler_ex03/pw_list/'
temp = requests.session()
temp.get(url1)
token = temp.cookies['csrftoken']
data = {'username':'admin','password':'123456','csrfmiddlewaretoken':token}
temp.post(url1,data)
temp.get(url2)
token = temp.cookies['csrftoken']
data['csrfmiddlewaretoken'] = token
password={}
def loop(passwd):
html = temp.get(url3)
pos = re.findall(r'password_pos">([0-9]*)</td>',html.text)
val = re.findall(r'password_val">([0-9]*)</td>',html.text)
for i in range(len(pos)):
# if val[i] not in passwd:
passwd[int(pos[i]) - 1] = val[i]
print(passwd)
print(len(passwd))
passwd = ['' for i in range(100)]
T = ['t1','t2']
while '' in passwd:
for t in T:
t = Thread(target=loop(passwd))
t.start()
time.sleep(8)
for i in T:
t.join()
passwd = ''.join(passwd)
print(passwd)
github
github:https://github.com/ChrisX2016/Web_crawler/tree/master/chuangguan