I am trying to load a CSV file in GCS into Cloud SQL SQL Server database using sqlalchemy and dataframe.to_sql
The problem I am running into is the columns in the database have spaces in them. I replaced the spaces with "_" character and out example looks like this.
Works
(@From_Date NVARCHAR(MAX)) INSERT INTO dbo.calendar ([From_Date]) VALUES (@From_Date)
Fails
(@From Date NVARCHAR(MAX)) INSERT INTO dbo.calendar ([From Date]) VALUES (@From Date)
Is there anyway to make it behaves like this
(@FromDate NVARCHAR(MAX)) INSERT INTO dbo.calendar ([From Date]) VALUES (@FromDate)
or this?
(@From_Date NVARCHAR(MAX)) INSERT INTO dbo.calendar ([From Date]) VALUES (@From_Date)
Python Script
import os
import pandas as pd
import pytds
import sqlalchemy
from fast_to_sql import fast_to_sql as fts
from google.cloud import storage
from google.cloud.sql.connector import connector
from io import StringIO
column_names = [..., "From Date",...]
def init_connection_engine() -> sqlalchemy.engine.Engine:
def getconn() -> pytds.Connection:
conn = connector.connect(
"instance_name",
"pytds",
user="sqlserver",
password="",
db="my_db"
)
return conn
engine = sqlalchemy.create_engine(
"mssql+pytds://localhost",
creator=getconn,
)
engine.dialect.description_encoding = None
return engine
# GCS client
storage_client = storage.Client(project=project_name)
# storage bucket connection
bucket = storage_client.get_bucket(bucket_name)
blob_file = bucket.get_blob(file_name)
gcs_file_byte = blob_file.download_as_string()
gcs_file_string = gcs_file_byte.decode()
file_data_string = StringIO(gcs_file_string)
calendar_dataframe = pd.read_csv(file_data_string, sep=',', header=None, usecols=[*range(0, 15)],
names=column_names)
pool = init_connection_engine()
with pool.connect() as db_conn:
calendar_dataframe.to_sql(table_id, db_conn, schema='dbo', if_exists='append', chunksize=None, index=False)
mssql+pyodbc://works fine.