crawler_pictures on http://www.officeplus.cn
To Get my background pictures
- When i see the pictures in the microsoft,i just want to get it!
Code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
|
from selenium import webdriver from bs4 import BeautifulSoup import urllib.request import os
srcs = [] names = []
def (url): driver = webdriver.Firefox() driver.get(url) lis = driver.find_elements_by_class_name('dlink') for li in lis: if li.get_attribute('data-href') == None: continue elif li.get_attribute('data-href') == 'http://www.officeplus.cn/images/officeplus/loading_1.jpg': srcs.append(li.get_attribute('lazy-src')) else: srcs.append(li.get_attribute('data-href'))
def store(): path = os.getcwd() new_path = os.path.join(path, u'pictures') if not os.path.isdir(new_path): os.mkdir(new_path) for i in range(len(srcs)): PATH = new_path + '\' + str(i) + '.jpg' urllib.request.urlretrieve(srcs[i],PATH) print (i , 'sucess!' , 'NEXT PAGE!')
if __name__ == "__main__": browser('http://www.officeplus.cn/List.shtml?cat=IMAGE&tag=19&order=1') store() print ((srcs)) print(len(srcs))
|
- just record my crawler life~
近期评论