I’m using Python + Selenium + ChromeDriver to check a list of titles (from a CSV file) against an online library catalog. My script searches each title and tries to determine if a specific library has it.
The issue is that even when I can see in the browser that the title is available, my script still reports it as “not found.”
After inspecting the site, I realized:
The first results page only shows a summary like “1 library has this title”, without listing the libraries.
You have to click the title link to open a details page that contains a holdings table (<table id="dpCentralHoldingsDetails">) showing which libraries own the item.
My script doesn’t reliably navigate to this page or wait long enough for the holdings table to load (since it uses headless Chrome).
import csv
import time
import random
import urllib.parse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
CSV_FILE = "/Users/hudaabbasi/Desktop/mel_catalog_checker/horizon_dvds.csv"
OUTPUT_FILE = "/Users/hudaabbasi/Desktop/mel_catalog_checker/not_found.csv"
CHROMEDRIVER_PATH = "/Users/hudaabbasi/Desktop/chromedriver"
options = Options()
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1920,1080")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-dev-shm-usage")
service = Service(CHROMEDRIVER_PATH)
driver = webdriver.Chrome(service=service, options=options)
wait = WebDriverWait(driver, 20)
with open(CSV_FILE, "r", encoding="utf-8", errors="ignore") as f:
reader = csv.DictReader(f)
rows = list(reader)
not_found = []
def check_melcat(title):
"""Search the catalog for a title and return True if found under a specific library."""
try:
query = urllib.parse.quote_plus(title)
url = f"https://search.mel.org/iii/encore/search/C__S{query}__Orightresult__U?lang=eng&suite=gold"
driver.get(url)
# Wait for either results or no results
wait.until(lambda d: d.find_elements(By.CSS_SELECTOR, "a.institutionCount, .noResultsText"))
# Case 1: No results
if driver.find_elements(By.CSS_SELECTOR, ".noResultsText"):
return False
# Case 2: Results exist — click first record
record_link = driver.find_elements(By.CSS_SELECTOR, "a.institutionCount")
if not record_link:
return False
href = record_link[0].get_attribute("href")
driver.get(href)
# Wait for the holdings table to appear
wait.until(EC.presence_of_element_located((By.ID, "dpCentralHoldingsDetails")))
# Extract all table cells
tds = driver.find_elements(By.CSS_SELECTOR, "#dpCentralHoldingsDetails td")
for td in tds:
text = td.text.strip().lower()
if "dearborn public library" in text and "heights" not in text:
return True
return False
except Exception as e:
print(f"⚠️ Error searching '{title}': {e}")
with open("debug_lastpage.html", "w", encoding="utf-8") as dbg:
dbg.write(driver.page_source)
return False
for row in rows:
title = row.get("Title") or row.get("title") or list(row.values())[0]
title = title.strip() if title else ""
barcode = row.get("Barcode", "")
print(f"🔍 Checking '{title}' ...")
found = False
retries = 2
for attempt in range(retries):
found = check_melcat(title)
if found or attempt == retries - 1:
break
print("⏳ Retrying...")
time.sleep(3)
if found:
print(f"'{title}' is listed for the library.")
else:
print(f"'{title}' NOT found for the library.")
not_found.append({"Title": title, "Barcode": barcode})
# polite random delay
time.sleep(random.uniform(2, 4))
if not_found:
pd.DataFrame(not_found).to_csv(OUTPUT_FILE, index=False, encoding="utf-8")
print(f"\n Done! Missing titles saved to '{OUTPUT_FILE}'.")
else:
print("\n All items are listed!")
driver.quit()
What I tried and what I expected
I used WebDriverWait and time.sleep() after clicking the result link to wait for the holdings table to appear, but it still didn’t always load before Selenium tried to read it. I expected the script to find the table and detect the target library name, but instead it keeps returning “not found” for every title.
What I want to know
- How can I make Selenium wait reliably until the holdings table (
dpCentralHoldingsDetails) is fully loaded? - How can I ensure my script correctly checks whether a specific library name appears in that table?
--headless? Do you see expected elements when you run without--headless? Maybe it doesn't havedpCentralHoldingsDetailsat all.CSVbecause we can't run it to test problem.driver.page_sourceto see what you really get from server (when you run headless). Maybe it sends Captcha or warning because it detected that you use script.