You don't have to do things in that order. If you instead to first read all data you need, you'll see another pattern emerge:
for file in allfiles:
print(file)
xls = pd.ExcelFile(file)
df_sheet1 = pd.read_excel(xls, 'sheet1')
df_overall_data = pd.read_excel(xls, 'overall_data')
year = df_overall_data ['date'].iloc[0].year
month = df_overall_data ['date'].iloc[0].month
precision_actual_a = df_sheet1.loc[df_sheet1['Region'] == 'AA', 'Precision'].iloc[0]
precision_actual_b = df_sheet1.loc[df_sheet1['Region'] == 'BB', 'Precision'].iloc[0]
precision_actual_c = df_sheet1.loc[df_sheet1['Region'] == 'CC', 'Precision'].iloc[0]
list_a = []
list_b = []
list_c = []
##Populating year to all the lists
list_a.append(year)
list_b.append(year)
list_c.append(year)
##Populating month to all the lists
list_a .append(month)
list_b.append(month)
list_c.append(month)
##Populating precision value to all the lists
list_a.append(precision_actual_a)
list_b.append(precision_actual_b)
list_c.append(precision_actual_c)
##Populating Q2 values(fixed values) to all lists
list_a.append(77)
list_b.append(65)
list_c.append(72)
##Adding lists (list_a,list_b,list_c) to respective precision lists
df_precision_list_a.append(list_a)
df_precision_list_b.append(list_b)
df_precision_list_c.append(list_c)
You might notice that all the append's are unnecessary. We can add all values directly into the lists:
for file in allfiles:
print(file)
xls = pd.ExcelFile(file)
df_sheet1 = pd.read_excel(xls, 'sheet1')
df_overall_data = pd.read_excel(xls, 'overall_data')
year = df_overall_data ['date'].iloc[0].year
month = df_overall_data ['date'].iloc[0].month
precision_actual_a = df_sheet1.loc[df_sheet1['Region'] == 'AA', 'Precision'].iloc[0]
precision_actual_b = df_sheet1.loc[df_sheet1['Region'] == 'BB', 'Precision'].iloc[0]
precision_actual_c = df_sheet1.loc[df_sheet1['Region'] == 'CC', 'Precision'].iloc[0]
list_a = [year, month, precision_actual_a, 77]
list_b = [year, month, precision_actual_b, 65]
list_c = [year, month, precision_actual_c, 72]
##Adding lists (list_a,list_b,list_c) to respective precision lists
df_precision_list_a.append(list_a)
df_precision_list_b.append(list_b)
df_precision_list_c.append(list_c)
You might notice now that the temporary lists are unnecessary too:
for file in allfiles:
print(file)
xls = pd.ExcelFile(file)
df_sheet1 = pd.read_excel(xls, 'sheet1')
df_overall_data = pd.read_excel(xls, 'overall_data')
year = df_overall_data ['date'].iloc[0].year
month = df_overall_data ['date'].iloc[0].month
precision_actual_a = df_sheet1.loc[df_sheet1['Region'] == 'AA', 'Precision'].iloc[0]
precision_actual_b = df_sheet1.loc[df_sheet1['Region'] == 'BB', 'Precision'].iloc[0]
precision_actual_c = df_sheet1.loc[df_sheet1['Region'] == 'CC', 'Precision'].iloc[0]
df_precision_list_a.append([year, month, precision_actual_a, 77])
df_precision_list_b.append([year, month, precision_actual_b, 65])
df_precision_list_c.append([year, month, precision_actual_c, 72])
And now we have small enough code to be put in its own function. You could reduce the code a bit more and put it in a function like this:
def create_precision_lists(files):
df_precision_lists = [[], [], []]
for file in files:
xls = pd.ExcelFile(file)
df_sheet1 = pd.read_excel(xls, 'sheet1')
df_overall_data = pd.read_excel(xls, 'overall_data')
year = df_overall_data ['date'].iloc[0].year
month = df_overall_data ['date'].iloc[0].month
for i, region in ('AA', 'BB', 'CC'):
precision_actual = df_sheet1.loc[df_sheet1['Region'] == region, 'Precision'].iloc[0]
df_precision_lists[i] = [year, month, precision_actual, 77]
df_precision = [pd.DataFrame(precision_list, columns=['Year', 'Month', 'Q1', 'Q2']) for precision_list in df_precision_lists]
return df_precision
It's not a great modular function though. To make it modular, you have to remove all the hardcoded values and you shouldn't rely on that specific keys and attributes exist. However, it's hard to do anything about it without knowing the full context of what you're trying to do and what your hard requirements and constraints are.