python_code

代理ip代码

代理地址如下几个可以更换
http://www.xicidaili.com/nn/

https://www.kuaidaili.com/free/inha/
可以继续添加

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from bs4 import BeautifulSoup
import requests
import random

def get_ip_list(url, headers):
    for i in range(25):
        i += 1
        i = str(i)
        url = url + i
        print(url)
        web_data = requests.get(url, headers=headers)
        soup = BeautifulSoup(web_data.text, 'lxml')
        ips = soup.find_all('tr')
        ip_list = []
        for j in range(1, len(ips)):
            ip_info = ips[j]
            tds = ip_info.find_all('td')
            ip_list.append(tds[1].text + ':' + tds[2].text)
        if int(i) > 9:
            url = url[:-2]
        else:
            url = url[:-1]
    return ip_list

def get_random_ip(ip_list):
    proxy_list = []
    for ip in ip_list:
        proxy_list.append('http://' + ip)
    proxy_ip = random.choice(proxy_list)
    proxies = {'http': proxy_ip, 'https': proxy_ip}
    return proxies

if __name__ == '__main__':
    url = 'https://www.kuaidaili.com/free/inha/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }
    ip_list = get_ip_list(url, headers=headers)
    proxies = get_random_ip(ip_list)
    with open('F:ip_list.txt', 'w') as f:
        f.write(str(ip_list))
        f.close()
    print(proxies)

以下是主代码

  1. headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}
    2:params:{'':''}
    3:cookies={'':''}
    4:proxies = {'':''} #代理ip
    def get-html(url):
    r=requests.get(url,headers=headers,params=params,cookies=cookies,proxies=proxies)
    r.encoding = r.apparent_encoding
    print(r.status_code)
    html = r.text