Python Memory Ussage

Question

So I have some code that takes in a set of files, stitches what it can together, and plots them. I'm posting the bulk of my code in an attempt to make this more readable, more can be added if needed

for paths,dirs,files in os.walk(start_path):
for d in dirs:
    path = start_path +  changedir + d
    pathpart = d

    os.chdir(path)
    for file in glob.glob("*-0.dat"):
        tempname = file.split("-")
        fileName1 = str(tempname[0] + "-" + tempname[1]+ "-")
        gc.collect()

        Chan2 = []
        Chan1 = []
        temp_1 = []
        temp_2 = []
        temp_3 = []
        Data_Sets1 = []
        Data_Sets2 = []
        Headers = []


        for fileNumber in range(0,45):
            fileName = fileName1 + str(fileNumber) + fileName3
            header, data1, data2 = u.unpackFile(path,fileName)

            if header == None:
                logging.warning("curropted file found at " + fileName)
                Data_Sets1.append(temp_1)
                Data_Sets2.append(temp_2)
                Headers.append(temp_3)
                temp_1 = []
                temp_2 = []
                temp_3 = []
            else:
                logging.info(fileName + " is good!")
                temp_3.append(header)
                for i in range(0,10000):
                    temp_1.append(data1[i])
                    temp_2.append(data2[i])

        Data_Sets1.append(temp_1)
        Data_Sets2.append(temp_2)
        Headers.append(temp_3)
        temp_1 = []
        temp_2 = []
        temp_3 = []

        del temp_1
        del temp_2
        del temp_3

        lengths = []
        for i in range(len(Data_Sets1)):
            lengths.append(len(Data_Sets1[i]))
        index = lengths.index(max(lengths))

        Chan1 = Data_Sets1[index]
        Chan2 = Data_Sets2[index]
        Start_Header = Headers[index]
        if (len(Chan1) == 0 | len(Chan2) == 0):
            continue
        try:
            Date = Start_Header[index][0]
            Time = Start_Header[index][1]
        except IndexError:
            logging.critical("file " + fileName + " is unusuable")
            continue
        """
        Clostest_Power = int(np.log(len(Chan1))/np.log(2))
        Length = 2 ** Clostest_Power
        logging.debug("Length of the file is " + str(Length))
        Chan1 = Chan1[0:Length]
        Chan2 = Chan2[0:Length]
        """
        logging.debug("Length of channels is " + str(len(Chan1)))   

        window = np.hanning(Window_Width)

        t= s.Time_Array(len(Chan1),Sample_Rate)


        window2 = np.hanning(len(Chan1))

        Noise_Frequincies = []
        for i in range(1,125):
            Noise_Frequincies.append(60.0*float(i))
        Noise_Frequincies.append(180.0)

        filter1 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan1)
        filter2 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan2)

        logging.info("Starting the plots")


        fig1, (ax1, ax2) = plt.subplots(nrows=2)


        spec1, freqs1, time1 = mlab.specgram(filter1, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)


        im1 = ax1.imshow(spec1, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
            extent=[t[0], t[len(t)-1], freqs1.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)

        ax1.set_title(str(Date) + "-" + str(Time) + " Channel 1")
        ax1.set_ylabel("Freqency Hz")



        spec2, freqs2, time2 = mlab.specgram(filter2, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)

        im2 = ax2.imshow(spec2, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
            extent=[t[0], t[len(t)-1], freqs2.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)

        cax1, kw1 = matplotlib.colorbar.make_axes(ax1)
        colorbar(im1,cax=cax1,**kw1)
        cax2, kw2 = matplotlib.colorbar.make_axes(ax2)
        colorbar(im2,cax=cax2,**kw2)

        ax2.set_title(str(Date) + "-" + str(Time) + " Channel 2")
        ax2.set_ylabel("Freqency Hz")


        save1 = save_path+pathpart + changedir+specgram_path
        if not os.path.exists(save1):
            os.makedirs(save1)
        savefig(os.path.join(save1,str(Date) + "-" + str(Time) + "-Power_Spec1.png"))

        logging.info("Spectrogram path is " + save1)




        fig2, (ax4,ax6) = plt.subplots(nrows=2)
        final_fft = []
        fft = s.Full_FFT(filter1,window2)
        for i in range(0,len(fft)):
            final_fft.append(np.absolute(fft[i]))



        freqs = []
        for i in range(0,len(final_fft)):
            freqs.append(i*Sample_Rate/float(len(final_fft)))

        ax4.plot(freqs, final_fft)

        new_fft = []
        new = s.Full_FFT(filter2,window2)
        for i in range(0,len(new)):
            new_fft.append(np.absolute(new[i]))
        ax6.plot(freqs,new_fft)


        save2 = save_path+pathpart+ changedir + freq_path
        logging.info("Frequency path is " + save2)
        if not os.path.exists(save2):
            os.makedirs(save2)

        savefig(os.path.join(save2,str(Date) + "-" + str(Time) + "-Freq.png"))

        ax4.set_title(str(Date) + "-" + str(Time) +" Channel 1")
        ax4.set_xlabel("Bins")
        ax4.set_ylabel("Power")


        ax6.set_title(str(Date) + "-" + str(Time) + " Channnel 2")
        ax6.set_xlabel("Bins")
        ax6.set_ylabel("Power")

        fig3, (ax7, ax9) = plt.subplots(nrows=2)
        ax7.plot(t,filter1)
        ax9.plot(t,filter2)

        save3 = save_path+pathpart + changedir +signal_path
        if not os.path.exists(save3):
            os.makedirs(save3)
        savefig(os.path.join(save3,str(Date) + "-" + str(Time) + "-Signal.png"))

        logging.info("Signal path is " + save3)

        fig1.clf()
        fig2.clf()
        fig3.clf()
        matplotlib.pyplot.clf()
        close('all')
        gc.collect()

and here is the unpacking code

def unpackFile(path,fileName):
header = ""
startKey = ""
dataList = []
chan1 = []
chan2 = []
found = False
logging.info("Starting file " + fileName)
if not os.path.isfile(os.path.join(path,fileName)):
    logging.warning("could not find "+fileName)
    return None, None, None
try:
    contents = open(os.path.join(path,fileName),'rb')
except IOError:
    logging.warning(fileName + " Not found")
    return None, None, None



#looks for the closing bracket in the header of the file
filesize = os.path.getsize(os.path.join(path,fileName))
if filesize < 1000:
    logging.warning(fileName + " is below 1000 bytes")
    contents.close()
    contents = None
    return None, None, None
while found==False:
    char = contents.read(1)
    #print char
    header = header + char
    if char == "}":
        #Once the close bracket is found, the next 10 characters should be the start key
        startKey = contents.read(10)
        #header = header + startKey
        #print("found the }")
        found = True 
if startKey=="Data_Start":
    logging.info("Found start key for file "+fileName)
else:
    logging.warning("No start key found " + fileName + " is corrupted")
    contents.close()
    contents = None
    return None, None, None   
#Looks for the end key in the file 
try:
    logging.debug("Reading the data")
    data = contents.read(40000)
    #endKey = data[len(data)-10:len(data)]
    endKey = contents.read()
except IOError:
    logging.warning("IOE error trying to read the end key")
    endKey=""
    contents.close()
    contents = None
    return None, None, None

if endKey == "Data_Stop ":
    logging.debug("Found end key " )
else:
    logging.debug("No end key found in" +fileName)
#Unpacks the data from binary into signed ints
for i in range(0,len(data),2):
    value = data[i:i+2]
    if len(value)==2:
        number = struct.unpack('>h',data[i:i+2])
        #print number
        dataList.append(number[0])
    else:
        break
logging.debug("total points found is " + str(len(dataList)))
#Splits data into two channels
for j in range(0,len(dataList)):
    if j%2==0:
        chan2.append(dataList[j])
        #if dataList[j] != 0:
            #print("chan2 has a non 0 " + str(j))
    else:
        chan1.append(dataList[j])
#Checks to make sure both channels contain 10000 data points. If this is not true the file is curppted
if len(chan2)!=10000:
    logging.warning("Chanel 2 did not containg the right number of data points, " + fileName + " is corupted")
    contents.close()
    contents = None
    return None, None, None
if len(chan1)!=10000:
    logging.warning("Chanel 1 did not containg the right number of data points, " + fileName + " is corupted")
    contents.close()
    contents = None
    return None, None, None
contents.close()
contents = None

header = header[1:len(header)-1]
header_parts = header.split(',')
return header_parts,chan1,chan2

Somewhere is a memory leak, and I don't know where. I'm trying to get the code to walk through directories, pick out the data sets, and then plot them. After a few minutes this eats up several GB of ram. Any tips to reduce them?

Sometimes the exercise of boiling your code down to be Minimal, Complete, Tested and Readable can help you find the answer on your own. How small can you make this code while still demonstrating the problem? — mhlester
– mhlester, Commented Mar 5, 2014 at 4:56
if I get rid of the directory looping, things stay small enough. My issue isn't a catastrophic failure, just for some reason in each loop memory is not released. — Cate Daniel
– Cate Daniel, Commented Mar 5, 2014 at 4:59

Wojciech Walczak · Accepted Answer · 2014-03-27 09:29:59Z

1

In cases like yours I prefer to leave the cleaning to the operating system. Thus, I run the code that leaks memory as a separate process:

from multiprocessing import Process, Queue

def memory_leaking_code(arg1, q):
   """Your memory leaking code goes here"""
   print arg1
   q.put('data from memory_leaking_code()')

def main():
   q = Queue()
   p = Process(target=memory_leaking_code, args=('data to memory_leaking_code()', q))
   p.start()
   print q.get()
   if p.is_alive():
      p.terminate()

main()

edited Mar 27, 2014 at 9:29

answered Mar 5, 2014 at 7:20

Wojciech Walczak

3,6192 gold badges25 silver badges24 bronze badges

Sign up to request clarification or add additional context in comments.

4 Comments

Cate Daniel Over a year ago

I'm using many functions, all of which I can't pickle. I've tried to look into that in the past and I was not able to find a clear answer on how to pickle a function.

Wojciech Walczak Over a year ago

You don't have to pickle anything. Just run your unpackFile() function through the multiprocessing.Process() and let your operating system free the memory that this function may be leaking/locking.

Cate Daniel Over a year ago

Well I'm getting can't pickle function errors when it's trying to unpack.

Wojciech Walczak Over a year ago

Could you present the code that replicates your current problem? Try to keep it as small as possible. Just get rid of anything that is not necessary (exceptions handling, logging etc.)

Collectives™ on Stack Overflow

Python Memory Ussage

1 Answer 1

4 Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

4 Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related