How to write multiple columns to a CSV header using python?

Question

I'm currently scraping websites for multiple keywords in their source code using python 2.7. I would like to allocate and export these keywords to individual columns in an exported CSV file like this:

However, with my code I'm getting this:

My code:

import urllib2
import csv

fieldnames = ['Website', 'Sitemap', 'Viewport', '@media']

def csv_writerheader(path):
    with open(path, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

def csv_writer(domainname,Sitemap, path):
    with open(path, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # writer.writeheader()
        writer.writerow({'Website': domainname, 'Sitemap': Sitemap})

csv_output_file = 'exported_print_results.csv'
keyword1 = ['sitemap']
keyword2 = ['viewport']
keyword3 = ['@media']

csv_writerheader(csv_output_file)

f = open('top1m-edited.csv')
csv_f = csv.reader(f)
for line in f:
    strdomain = line.strip()
    if '.nl' in strdomain:
        try:
            req = urllib2.Request(strdomain.strip())
            response = urllib2.urlopen(req)
            html_content = response.read()

            # keyword 1
            for searchstring in keyword1:
                if searchstring.lower() in str(html_content).lower():
                    print (strdomain, keyword1, 'found')
                    csv_writer(strdomain, 'found', csv_output_file)

                else:
                    print (strdomain, keyword1, 'not found')
                    csv_writer(strdomain, 'not found', csv_output_file)

            # keyword 2
            for searchstring in keyword2:
                if searchstring.lower() in str(html_content).lower():
                    print (strdomain, keyword2, 'found')
                    csv_writer(strdomain, 'found', csv_output_file)

                else:
                    print (strdomain, keyword2, 'not found')
                    csv_writer(strdomain, 'not found', csv_output_file)

            # keyword 3
            for searchstring in keyword3:
                if searchstring.lower() in str(html_content).lower():
                    print (strdomain, keyword3, 'found')
                    csv_writer(strdomain, 'found', csv_output_file)

                else:
                    print (strdomain, keyword3, 'not found')
                    csv_writer(strdomain, 'not found', csv_output_file)

        except urllib2.HTTPError:
            print (strdomain, 'HTTP ERROR')

        except urllib2.URLError:
            print (strdomain, 'URL ERROR')

        except urllib2.socket.error:
            print (strdomain, 'SOCKET ERROR')

        except urllib2.ssl.CertificateError:
            print (strdomain, 'SSL Certificate ERROR')
f.close()

How should I edit my code to make this work?

Parfait · Accepted Answer · 2017-02-26 02:28:04Z

Consider using a dictionary to store the found and not found values conditionally by keyword and pass that into your csv write method. But before that one of your issues is not specifying the lineterminator in csv.writer() which tends to be needed on Window text files. And try iterating across a list of keywords in one loop routine.

fieldnames = ['Website', 'Sitemap', 'Viewport', '@media']

def csv_writerheader(path):
    with open(path, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, lineterminator='\n')
        writer.writeheader()

def csv_writer(dictdata, path):
    with open(path, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, lineterminator='\n')
        writer.writerow(dictdata)

csv_output_file = 'exported_print_results.csv'
# LIST OF KEY WORDS (TITLE CASE TO MATCH FIELD NAMES)
keywords = ['Sitemap', 'Viewport', '@media']

csv_writerheader(csv_output_file)

with open('top1m-edited.csv', 'r') as f:
    csv_f = csv.reader(f, lineterminator='\n')
    for line in f:
        strdomain = line.strip()
        # INITIALIZE DICT
        data = {'Website': strdomain}

        if '.nl' in strdomain:        
            try:
                req = urllib2.Request(strdomain.strip())
                response = urllib2.urlopen(req)
                html_content = response.read()

                # ITERATE THROUGH EACH KEY AND UPDATE DICT
                for searchstring in keywords:
                    if searchstring.lower() in str(html_content).lower():
                        print (strdomain, searchstring, 'found')
                        data[searchstring] = 'found'    
                    else:
                        print (strdomain, searchstring, 'not found')
                        data[searchstring] = 'not found'

                # CALL METHOD PASSING DICT AND OUTPUT FILE
                csv_writer(data, csv_output_file)

                except urllib.HTTPError:
                    print (strdomain, 'HTTP ERROR')

                except urllib.URLError:
                    print (strdomain, 'URL ERROR')

                except urllib.socket.error:
                    print (strdomain, 'SOCKET ERROR')

                except urllib.ssl.CertificateError:
                    print (strdomain, 'SSL Certificate ERROR')

CSV Output

Website                 Sitemap     Viewport    @media
http://www.google.nl    not found   not found   found
http://www.youtube.nl   not found   found       not found
http://www.facebook.nl  not found   found       not found

user650881 · Accepted Answer · 2017-02-25 22:06:42Z

0

The default separator in your spreadsheet appears not to be comma. Most likely it is a TAB. You could either change the separator to comma when importing (there is typically an import dialog which allows you to select it) or output from Python with TAB as the field separator.

answered Feb 25, 2017 at 22:06

user650881

2,54523 silver badges38 bronze badges

Collectives™ on Stack Overflow

How to write multiple columns to a CSV header using python?

2 Answers 2

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related