您的当前位置:首页正文

selenium实战Amazon

来源:花图问答

一,结果


image.png

2.思路

3.上源码

from selenium import webdriver
from  import Keys
from  import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
from bs4 import BeautifulSoup
import pymysql


class Amazon(object):

    def __inig__(self):
        pass
        #self.amazon_url = 

    #################数据库#######
    def mysql(self):
        pass
        # conn = pymysql.Connect(host='x',user='x',password='x',database='a',port=x,charset='x')
        # cursor=conn.cursor()
        # dataname =input('请输入数据库名:')
        # sql = 'CREATE TABLE IF NOT EXISTS %s(ID INT(10) NOT NULL PRIMARY KEY AUTO_INCREMENT,' \
        #       'A TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,' \
        #       'COMMODITY VARCHAR(255))ENGINE = INNODB DEFAULT CHARSET=utf8'
        # cursor.execute(sql%dataname)
        # print('创建成功!!')

        #keyword字段修改搜索
    def get_amazon_bag(self):
        browser = webdriver.Firefox()
        browser.set_window_size(900,900)
        timeout = WebDriverWait(browser,10)

        time.sleep(3)
        url = 
        browser.get(url)

        timeout.until(EC.presence_of_element_located((By.ID,'atfResults')))

        a = 0
        try:
            while True:
                for down in range(0,10000,1000):
                    browser.execute_script('window,scrollBy(0,{})'.format(down))
                    time.sleep(2)

                    print(a)
                    #if a ==6:
                        #print(browser.page_source)
                    html = browser.page_source
                    soup = BeautifulSoup(html,'lxml')
                    items = soup.find('ul',attrs={'id':'s-results-list-atf'})
                    #解析单个商品
                    itema = [itema.get_text() for itema in items.find_all('div',class_='s-item-container')]
                    for item in itema:
                        name_a = item.strip().replace('\n\n','')[:100]  #提取前100个字,可以修改
                        a +=1
                        print('{}\n{}'.format(a,name_a))

                        #########数据库#######
                    #     sql = 'INSERT INTO %s(COMMODITY)VALUES("%s")'
                    #     value=(dataname,name_a)
                    #     cursor.execute(sql%value)
                    #
                    # 
                    # print('提交成功')
                    #
                    #     #item为当个信息商品信息
                    time.sleep(5)
                    #点击下一页
                    timeout.until(EC.presence_of_element_located((By.ID,'pagnNextString'))).click()
                    time.sleep(4)
                    ####测试####
                    # cursor.close()
                    # conn.close()
                    # print('测试关闭数据库/游标了')
        except Exception as e:
            print(e)
        finally:
            pass
            #关闭浏览器
            # cursor.close()
            # conn.close()
            # print('关闭数据库/游标!')
            # print('关闭浏览器!')
            # browser.quit()
a = Amazon()
a.get_amazon_bag()