• Home
  • About
    • Miles' Blog photo

      Miles' Blog

      Miles 用來記錄學習心得及筆記的地方。

    • Learn More
    • Email
    • Github
  • Posts
    • All Posts
    • All Tags
  • Projects

TLD LIST 爬蟲程式

04 Mar 2017

Reading time ~1 minute

用selenium和bs4爬取tld-list報價(register和renew),並準備做分析或Visualization。

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import time, re
from bs4 import BeautifulSoup

browser=webdriver.Firefox()
browser.get("https://tld-list.com/")
soup=BeautifulSoup(browser.page_source)
domainNameList=[]
regPriceList=[]
renewPriceList=[]
while True:
    for ele in soup.select('.dataTables_table_wrapper table tbody tr'):
        for subEle in ele.select('.label-info'):
            domainNameList.append(subEle.text)
        for subEle in ele.select('.rg-col'):
            regPriceList.append(float(subEle.find("span", { "itemprop" : "price" }).text.replace(',','') if subEle.find("span", { "itemprop" : "price" }) is not None else 0))
        for subEle in ele.select('.rn-col'):
            renewPriceList.append(float(subEle.find("span", { "itemprop" : "price" }).text.replace(',','') if subEle.find("span", { "itemprop" : "price" }) is not None else 0))
    if soup.findAll("a", { "class" : "btn btn-default next" }):
        browser.find_element_by_link_text(">").click()
        soup=BeautifulSoup(browser.page_source)
    else:
        break
browser.close()
zippedDP=zip(domainNameList,regPriceList,renewPriceList)
for zdp in zippedDP:
    print zdp


PythonseleniumBeautifulSoupI.T. Share Tweet +1