
使用socket和asynico库爬取数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
import asyncio import socket from urllib.parse import urlparse
async def (url): url = urlparse(url) host = url.netloc path = url.path if path == "": path = "/"
reader, writer = await asyncio.open_connection(host,80) writer.write("GET {} HTTP/1.1rnHost:{}rnConnection:closernrn".format(path, host).encode("utf8")) all_lines = [] async for raw_line in reader: data = raw_line.decode("utf8") all_lines.append(data) html = "n".join(all_lines) html_data = html.split("rnrn") print(len(html_data)) return html_data
async def main(): url = "xxx/?page={}" for i in range(2,20): task = asyncio.create_task(get_url(url.format(i))) await task
if name == "main": import time start_time = time.time() asyncio.run(main(),debug=True) print('last time:{}'.format(time.time()-start_time))
|
https://docs.python.org/zh-cn/3/library/asyncio.html
近期评论