以selenium模擬使用者按取表單,並在完成後送出Query,並自行抓取履歷資料。
缺點: 1.有時會對選取資料不靈敏,因此需要手動輔助。 2.sleep可任意調整,主要是為了因應Browser的讀取時間。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time, re
from bs4 import BeautifulSoup
browser=webdriver.Firefox()
browser.get('https://recruit.1111.com.tw/pResumeSearchTrial.aspx')
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'duty0Cht'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.LINK_TEXT, u'醫事人員/護理保健'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_10_2_0'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_10_2_2'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_10_2_4'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_10_2_1'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_10_2_3'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_subClose'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'smSure'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'city0Cht'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.LINK_TEXT, u'台北市'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_item_0_0'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'llllmenu_subClose'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'smSure'))).click()
time.sleep(1)
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.ID, 'ctl00_ctl00_CPH_CPH_btnSearch'))).click()
time.sleep(15)
nextPage=0
while True:
soup=BeautifulSoup(browser.page_source)
nextPage+=1
print 'Page:'+str(nextPage)
for ele in soup.select('.Areabox .closedBox'):
print ele.text
if soup.findAll('a', { 'id' : 'NextButton' }):
browser.find_element_by_id('NextButton').click()
time.sleep(20)
else:
break
browser.close()