In my flow, I query Hive, then update the file names, and then I want to combine these csvs into one excel workbook with multiple spreadsheets. I was able to make the two csv files merge into one excel workbook with multiple spreadsheets using this code. How do I get the script to use the two files from the nifi flow instead of pulling the files from a directory on my pc? I've seen that can do "flowFile = session.get()" but does this line capture both flowfiles?
import glob
import csv
import xlwt
import os
import xlsxwriter
import datetime
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
wb = xlsxwriter.Workbook("combined_at%s.xlsx" % datetime.datetime.now().strftime('%H-%M-%S'))
flowFile = session.get()
replacer = ",[]\"\"'\'"
worksheet = wb.add_worksheet("make")
worksheet2 = wb.add_worksheet("ownership")
worksheet3 = wb.add_worksheet("marital")
worksheet4 = wb.add_worksheet("drivers")
worksheet5 = wb.add_worksheet("vehicles")
worksheet6 = wb.add_worksheet("age")
worksheet7 = wb.add_worksheet("vyear")
def printHashedEmail(split_row, worksheet, index):
for y in replacer:
split_row[0] = split_row[0].replace(y, "")
worksheet.write(index, 0, split_row[0])
return;
def printOtherOnes(split_row, worksheet,index,non_changing_index):
for y in replacer:
split_row[non_changing_index] = split_row[non_changing_index].replace(y, "")
worksheet.write(index, 1, split_row[non_changing_index])
return;
with open("1.csv") as csv1:
i = 0
j = 0
for row in csv1:
split_row = row.split(",")
if split_row[2] != "":
printHashedEmail(split_row, worksheet, i)
printOtherOnes(split_row,worksheet,i,2)
i = i+1
if split_row[3].strip() != "":
printHashedEmail(split_row, worksheet2, j)
printOtherOnes(split_row, worksheet2, j, 3)
j = j+1
with open("2.csv") as csv1:
i = 0; k = 0; j = 0; l = 0;m = 0;
for row in csv1:
split_row = row.split(",")
if split_row[2] != "":
printHashedEmail(split_row, worksheet3, i)
printOtherOnes(split_row, worksheet3, i, 2)
i = i + 1
if split_row[3].strip() != "":
printHashedEmail(split_row, worksheet4, j)
printOtherOnes(split_row, worksheet4, j, 3)
j = j + 1
if split_row[5] != "":
printHashedEmail(split_row, worksheet5, l)
printOtherOnes(split_row, worksheet5, l, 5)
l = l + 1
if split_row[4].strip() != "":
printHashedEmail(split_row, worksheet6, k)
printOtherOnes(split_row, worksheet6, k, 4)
k = k + 1
if split_row[6].strip() != "":
printHashedEmail(split_row,worksheet7,m)
printOtherOnes(split_row, worksheet7, m, 6)
m = m + 1
wb.close()
print("Done")
After manipulation, I would like the excel file to exit the ExecuteScriptProcessor so I can do more with it