I'm currently scraping websites for multiple keywords in their source code using python 2.7. I would like to allocate and export these keywords to individual columns in an exported CSV file like this:
However, with my code I'm getting this:
My code:
import urllib2
import csv
fieldnames = ['Website', 'Sitemap', 'Viewport', '@media']
def csv_writerheader(path):
with open(path, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
def csv_writer(domainname,Sitemap, path):
with open(path, 'a') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writeheader()
writer.writerow({'Website': domainname, 'Sitemap': Sitemap})
csv_output_file = 'exported_print_results.csv'
keyword1 = ['sitemap']
keyword2 = ['viewport']
keyword3 = ['@media']
csv_writerheader(csv_output_file)
f = open('top1m-edited.csv')
csv_f = csv.reader(f)
for line in f:
strdomain = line.strip()
if '.nl' in strdomain:
try:
req = urllib2.Request(strdomain.strip())
response = urllib2.urlopen(req)
html_content = response.read()
# keyword 1
for searchstring in keyword1:
if searchstring.lower() in str(html_content).lower():
print (strdomain, keyword1, 'found')
csv_writer(strdomain, 'found', csv_output_file)
else:
print (strdomain, keyword1, 'not found')
csv_writer(strdomain, 'not found', csv_output_file)
# keyword 2
for searchstring in keyword2:
if searchstring.lower() in str(html_content).lower():
print (strdomain, keyword2, 'found')
csv_writer(strdomain, 'found', csv_output_file)
else:
print (strdomain, keyword2, 'not found')
csv_writer(strdomain, 'not found', csv_output_file)
# keyword 3
for searchstring in keyword3:
if searchstring.lower() in str(html_content).lower():
print (strdomain, keyword3, 'found')
csv_writer(strdomain, 'found', csv_output_file)
else:
print (strdomain, keyword3, 'not found')
csv_writer(strdomain, 'not found', csv_output_file)
except urllib2.HTTPError:
print (strdomain, 'HTTP ERROR')
except urllib2.URLError:
print (strdomain, 'URL ERROR')
except urllib2.socket.error:
print (strdomain, 'SOCKET ERROR')
except urllib2.ssl.CertificateError:
print (strdomain, 'SSL Certificate ERROR')
f.close()
How should I edit my code to make this work?

