0

I'm extracting a demand/supply/competitiveness data by putting keywords(which is from 디지털가전.xlsx) into the website (https://itemscout.io/keyword/). The problem is that if the keyword can't be analyzed in this website and doesn't show any data of demand/supply/competitiveness, because of the timeoutexception,it will stop running. I want it to continue to run even though it doesn't show any data. How do I solove this problem?

raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: 

this is the file where keywords are stored https://drive.google.com/file/d/1oKIXZWiw5WsazHBstKVT8UlgRSZfdx62/view last column of the excel file contains the keyword

import xlrd
import pandas as pd
import xlrd
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import openpyxl
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException


driver = webdriver.Chrome(executable_path=r"C:\Users\Kim woo jae\PycharmProjects\100개 키워드\chromedriver.exe")
list = []
list1 = []
list2 = []
list3 = []

driver.get("https://itemscout.io/keyword/")
wait = WebDriverWait(driver, 120)
b = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/input')))
b.clear()
b.send_keys('화장대')
c = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/span')))
c.click()
d = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody'))).text

df = pd.read_excel(r'디지털가전.xlsx')
ky = df['키워드']

for k in ky:
    b1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/input')))
    b1.clear()
    b1.send_keys(k)
    # c1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/span')))
    wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="container"]/div/div[2]/div/span'))).click()
    # c1.click()
    time.sleep(2)
    wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="container"]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody/tr/td[2]/div')))

    d1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody'))).text
    d1 = (d1).replace(',', '.')
    d1 = ' '.join(d1.splitlines())
    d1 = d1.replace(' ', ',')
    result = [x.strip() for x in d1.split(',')]

    d = [result[0]]
    s = [result[1]]
    c = [result[2]]
    print([k],d,s,c)

    for ks in [k]:
        list.append(ks)
    for ds in d:
        list1.append(ds)
    for ss in s:
        list2.append(ss)
    for cs in c:
        list3.append(cs)

df["키워드 확인"] = list
df["총검색량"] = list1
df["공급량"] = list2
df["경쟁률"] = list3


df.to_excel("디지털가전 최종.xlsx")

1 Answer 1

1

You can wrap the call in a try\except:

for k in ky:
   try:
        b1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/input')))
        b1.clear()
        b1.send_keys(k)
        # c1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[2]/div/span')))
        wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="container"]/div/div[2]/div/span'))).click()
        # c1.click()
        time.sleep(10)
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="container"]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody/tr/td[2]/div')))

        d1 = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="container"]/div/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody'))).text
        d1 = (d1).replace(',', '.')
        d1 = ' '.join(d1.splitlines())
        d1 = d1.replace(' ', ',')
        result = [x.strip() for x in d1.split(',')]

        d = [result[0]]
        s = [result[1]]
        c = [result[2]]
        print([k],d,s,c)

        for ks in [k]:
            list.append(ks)
        for ds in d:
            list1.append(ds)
        for ss in s:
            list2.append(ss)
        for cs in c:
            list3.append(cs)
        driver.execute_script("window.history.go(-1)")  # back to search
        driver.refresh() # extra safety
   except TimeoutException:
      print("Got Timeout for Key:", k)
Sign up to request clarification or add additional context in comments.

11 Comments

where should put the except TimeoutException: ?
wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="container"]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div/table/tbody/tr/td[2]/div'))) if this doesn't appear, I want to continue to run the loop
Can I ask you one more question, sir? demand/supply/competitiveness data of a keyword sometimes will be printed as same as that of previous keyword, is there anyway I can make it correct? should I increase the amount of time to sleep?
Are you saying the results are wrong? Or you just want to remove\skip duplicate data ?
For example, if the result of keyword A is 1000/1000/1000, the result of following keyword B is 1000/1000/1000, and the result of following keyword C is 1000/1000/1000 the same, when it should be different
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.