I've a CSV file with table names and primary keys for those tables in below format:
| Table Name | Primary Key | | Table 1 | Col1 | | Table 1 | Col2 | | Table 1 | Col3 | | Table 2 | Col11 | | Table 2 | Col12 |
I want to run a sql query to validate PK constraint for every table. The query to do it would look like this:
select Col1, Col2, Col3 from Table1
group by Col1, Col2, Col3
having count(*)>1
But I've thousands of table in this file. How would I write and execute this query dynamically and write results into a flat file? I want to execute this using Python 3.
Attempt:
CSV:
My PKTest.py
def getColumns(filename):
tables = {}
with open(filename) as f:
for line in f:
line = line.strip()
if 'Primary Key' in line:
continue
cols = line.split('|')
table = cols[1].strip()
col = cols[2].strip()
if table in tables:
tables[table].append(col)
else:
tables[table] = [col]
return tables
def runSQL(table, columns):
statement = 'select {0} from {1} group by {0} having count(*) > 1'.format(', '.join(columns), table.replace(' ',''))
return statement
if __name__ == '__main__':
tables = getColumuns('PKTest.csv')
try:
#cursor to connect
for table in tables:
sql = runSQL(table,tables[table])
print(sql)
cursor.execute(sql)
for result in cursor:
print(result)
finally:
cursor.close()
ctx.close()
