I am trying to parse API response from GA to a Pandas DataFrame.
The request (sample from Google page):
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
"""Queries the Analytics Reporting API V4.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
Returns:
The Analytics Reporting API V4 response.
"""
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dateRanges': [{'startDate': 'today', 'endDate': 'today'}],
'metrics': [{'expression': 'ga:sessions'}],
'dimensions': [{'name': 'ga:country'}, {'name': 'ga:hostname'}]
}]
}
).execute()
And the response:
def print_response(response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response: An Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get(
'metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ' + dimension)
for i, values in enumerate(dateRangeValues):
print('Date range: ' + str(i))
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ': ' + value)
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
print_response(response)
Which outputs the following:
>> ga:country: United States
>> ga:hostname: nl.sitename.com
>> Date range: 0
>> ga:sessions: 1
>> ga:country: United States
>> ga:hostname: sitename.com
>> Date range: 0
>> ga:sessions: 2078
>> ga:country: Venezuela
>> ga:hostname: sitename.com
>> Date range: 0
>> ga:sessions: 1
>> ga:country: Vietnam
>> ga:hostname: de.sitename.com
>> Date range: 0
>> ga:sessions: 1
>> ga:country: Vietnam
>> ga:hostname: sitename.com
>> Date range: 0
>> ga:sessions: 32
Firstly I would like to place it in a dataframe rather than print it as in the Google example.
What I've tried:
def main():
analytics = initialize_analyticsreporting()
response = get_report(analytics)
df = pd.DataFrame(print_response(response))
return df
But this did not work since print_response function prints stuff.
I understand that probably I would need to add pandas dataframe and append information to it in the print_response function but I have no clue where I would do that to get something like this:
ga:country ga:hostname Date range ga:sessions
United States nl.sitename.com 0 1
Venezuela nl.sitename.com 0 1
Thank you for your suggestions.