TLD LIST 爬蟲程式

04 Mar 2017

Reading time ~1 minute

用selenium和bs4爬取tld-list報價(register和renew)，並準備做分析或Visualization。

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import time, re
from bs4 import BeautifulSoup

browser=webdriver.Firefox()
browser.get(&quot;https://tld-list.com/&quot;)
soup=BeautifulSoup(browser.page_source)
domainNameList=[]
regPriceList=[]
renewPriceList=[]
while True:
    for ele in soup.select('.dataTables_table_wrapper table tbody tr'):
        for subEle in ele.select('.label-info'):
            domainNameList.append(subEle.text)
        for subEle in ele.select('.rg-col'):
            regPriceList.append(float(subEle.find(&quot;span&quot;, { &quot;itemprop&quot; : &quot;price&quot; }).text.replace(',','') if subEle.find(&quot;span&quot;, { &quot;itemprop&quot; : &quot;price&quot; }) is not None else 0))
        for subEle in ele.select('.rn-col'):
            renewPriceList.append(float(subEle.find(&quot;span&quot;, { &quot;itemprop&quot; : &quot;price&quot; }).text.replace(',','') if subEle.find(&quot;span&quot;, { &quot;itemprop&quot; : &quot;price&quot; }) is not None else 0))
    if soup.findAll(&quot;a&quot;, { &quot;class&quot; : &quot;btn btn-default next&quot; }):
        browser.find_element_by_link_text(&quot;&gt;&quot;).click()
        soup=BeautifulSoup(browser.page_source)
    else:
        break
browser.close()
zippedDP=zip(domainNameList,regPriceList,renewPriceList)
for zdp in zippedDP:
    print zdp