from bs4 import BeautifulSoup
import urllib.parse
import requests
import csv
url = "http://bj.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"
page = 0
house_data=[]
with open("rent.csv","w",newline='') as csv_file:
csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL)
while True:
page += 1
print("fetch:",url.format(page=page))
response = requests.get(url.format(page=page))
html = BeautifulSoup(response.text)
house_list = html.select(".list > li")
if not house_list:
break
for house in house_list:
house_title = house.select("h2")[0].string.encode("utf8")
house_url = urllib.parse.urljoin(url,house.select("a")[0]["href"])
house_info_list = house_title.split()
if "公寓" in str(house_info_list[1]) or "青年社区" in str(house_info_list[1]):
house_location = house_info_list[0]
else:
house_location = house_info_list[1]
house_money = house.select(".money")[0].select("b")[0].string.encode("utf8")
house_title = bytes(house_title).decode('utf8')
house_money = bytes(house_money).decode('utf8')
house_data.append([house_title,house_url,house_money])
with open("rent.csv","w") as csv_file:
csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerows(house_data)
csv_file.close()
近期评论