So I have some code that takes in a set of files, stitches what it can together, and plots them. I'm posting the bulk of my code in an attempt to make this more readable, more can be added if needed
for paths,dirs,files in os.walk(start_path):
for d in dirs:
path = start_path + changedir + d
pathpart = d
os.chdir(path)
for file in glob.glob("*-0.dat"):
tempname = file.split("-")
fileName1 = str(tempname[0] + "-" + tempname[1]+ "-")
gc.collect()
Chan2 = []
Chan1 = []
temp_1 = []
temp_2 = []
temp_3 = []
Data_Sets1 = []
Data_Sets2 = []
Headers = []
for fileNumber in range(0,45):
fileName = fileName1 + str(fileNumber) + fileName3
header, data1, data2 = u.unpackFile(path,fileName)
if header == None:
logging.warning("curropted file found at " + fileName)
Data_Sets1.append(temp_1)
Data_Sets2.append(temp_2)
Headers.append(temp_3)
temp_1 = []
temp_2 = []
temp_3 = []
else:
logging.info(fileName + " is good!")
temp_3.append(header)
for i in range(0,10000):
temp_1.append(data1[i])
temp_2.append(data2[i])
Data_Sets1.append(temp_1)
Data_Sets2.append(temp_2)
Headers.append(temp_3)
temp_1 = []
temp_2 = []
temp_3 = []
del temp_1
del temp_2
del temp_3
lengths = []
for i in range(len(Data_Sets1)):
lengths.append(len(Data_Sets1[i]))
index = lengths.index(max(lengths))
Chan1 = Data_Sets1[index]
Chan2 = Data_Sets2[index]
Start_Header = Headers[index]
if (len(Chan1) == 0 | len(Chan2) == 0):
continue
try:
Date = Start_Header[index][0]
Time = Start_Header[index][1]
except IndexError:
logging.critical("file " + fileName + " is unusuable")
continue
"""
Clostest_Power = int(np.log(len(Chan1))/np.log(2))
Length = 2 ** Clostest_Power
logging.debug("Length of the file is " + str(Length))
Chan1 = Chan1[0:Length]
Chan2 = Chan2[0:Length]
"""
logging.debug("Length of channels is " + str(len(Chan1)))
window = np.hanning(Window_Width)
t= s.Time_Array(len(Chan1),Sample_Rate)
window2 = np.hanning(len(Chan1))
Noise_Frequincies = []
for i in range(1,125):
Noise_Frequincies.append(60.0*float(i))
Noise_Frequincies.append(180.0)
filter1 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan1)
filter2 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan2)
logging.info("Starting the plots")
fig1, (ax1, ax2) = plt.subplots(nrows=2)
spec1, freqs1, time1 = mlab.specgram(filter1, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)
im1 = ax1.imshow(spec1, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
extent=[t[0], t[len(t)-1], freqs1.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)
ax1.set_title(str(Date) + "-" + str(Time) + " Channel 1")
ax1.set_ylabel("Freqency Hz")
spec2, freqs2, time2 = mlab.specgram(filter2, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)
im2 = ax2.imshow(spec2, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
extent=[t[0], t[len(t)-1], freqs2.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)
cax1, kw1 = matplotlib.colorbar.make_axes(ax1)
colorbar(im1,cax=cax1,**kw1)
cax2, kw2 = matplotlib.colorbar.make_axes(ax2)
colorbar(im2,cax=cax2,**kw2)
ax2.set_title(str(Date) + "-" + str(Time) + " Channel 2")
ax2.set_ylabel("Freqency Hz")
save1 = save_path+pathpart + changedir+specgram_path
if not os.path.exists(save1):
os.makedirs(save1)
savefig(os.path.join(save1,str(Date) + "-" + str(Time) + "-Power_Spec1.png"))
logging.info("Spectrogram path is " + save1)
fig2, (ax4,ax6) = plt.subplots(nrows=2)
final_fft = []
fft = s.Full_FFT(filter1,window2)
for i in range(0,len(fft)):
final_fft.append(np.absolute(fft[i]))
freqs = []
for i in range(0,len(final_fft)):
freqs.append(i*Sample_Rate/float(len(final_fft)))
ax4.plot(freqs, final_fft)
new_fft = []
new = s.Full_FFT(filter2,window2)
for i in range(0,len(new)):
new_fft.append(np.absolute(new[i]))
ax6.plot(freqs,new_fft)
save2 = save_path+pathpart+ changedir + freq_path
logging.info("Frequency path is " + save2)
if not os.path.exists(save2):
os.makedirs(save2)
savefig(os.path.join(save2,str(Date) + "-" + str(Time) + "-Freq.png"))
ax4.set_title(str(Date) + "-" + str(Time) +" Channel 1")
ax4.set_xlabel("Bins")
ax4.set_ylabel("Power")
ax6.set_title(str(Date) + "-" + str(Time) + " Channnel 2")
ax6.set_xlabel("Bins")
ax6.set_ylabel("Power")
fig3, (ax7, ax9) = plt.subplots(nrows=2)
ax7.plot(t,filter1)
ax9.plot(t,filter2)
save3 = save_path+pathpart + changedir +signal_path
if not os.path.exists(save3):
os.makedirs(save3)
savefig(os.path.join(save3,str(Date) + "-" + str(Time) + "-Signal.png"))
logging.info("Signal path is " + save3)
fig1.clf()
fig2.clf()
fig3.clf()
matplotlib.pyplot.clf()
close('all')
gc.collect()
and here is the unpacking code
def unpackFile(path,fileName):
header = ""
startKey = ""
dataList = []
chan1 = []
chan2 = []
found = False
logging.info("Starting file " + fileName)
if not os.path.isfile(os.path.join(path,fileName)):
logging.warning("could not find "+fileName)
return None, None, None
try:
contents = open(os.path.join(path,fileName),'rb')
except IOError:
logging.warning(fileName + " Not found")
return None, None, None
#looks for the closing bracket in the header of the file
filesize = os.path.getsize(os.path.join(path,fileName))
if filesize < 1000:
logging.warning(fileName + " is below 1000 bytes")
contents.close()
contents = None
return None, None, None
while found==False:
char = contents.read(1)
#print char
header = header + char
if char == "}":
#Once the close bracket is found, the next 10 characters should be the start key
startKey = contents.read(10)
#header = header + startKey
#print("found the }")
found = True
if startKey=="Data_Start":
logging.info("Found start key for file "+fileName)
else:
logging.warning("No start key found " + fileName + " is corrupted")
contents.close()
contents = None
return None, None, None
#Looks for the end key in the file
try:
logging.debug("Reading the data")
data = contents.read(40000)
#endKey = data[len(data)-10:len(data)]
endKey = contents.read()
except IOError:
logging.warning("IOE error trying to read the end key")
endKey=""
contents.close()
contents = None
return None, None, None
if endKey == "Data_Stop ":
logging.debug("Found end key " )
else:
logging.debug("No end key found in" +fileName)
#Unpacks the data from binary into signed ints
for i in range(0,len(data),2):
value = data[i:i+2]
if len(value)==2:
number = struct.unpack('>h',data[i:i+2])
#print number
dataList.append(number[0])
else:
break
logging.debug("total points found is " + str(len(dataList)))
#Splits data into two channels
for j in range(0,len(dataList)):
if j%2==0:
chan2.append(dataList[j])
#if dataList[j] != 0:
#print("chan2 has a non 0 " + str(j))
else:
chan1.append(dataList[j])
#Checks to make sure both channels contain 10000 data points. If this is not true the file is curppted
if len(chan2)!=10000:
logging.warning("Chanel 2 did not containg the right number of data points, " + fileName + " is corupted")
contents.close()
contents = None
return None, None, None
if len(chan1)!=10000:
logging.warning("Chanel 1 did not containg the right number of data points, " + fileName + " is corupted")
contents.close()
contents = None
return None, None, None
contents.close()
contents = None
header = header[1:len(header)-1]
header_parts = header.split(',')
return header_parts,chan1,chan2
Somewhere is a memory leak, and I don't know where. I'm trying to get the code to walk through directories, pick out the data sets, and then plot them. After a few minutes this eats up several GB of ram. Any tips to reduce them?