numpy.load() wrong magic string error

Question

I have two files. One creates a numpy array in compressed sparse row format

from sklearn.feature_extraction.text import TfidfTransformer
import pdb

def stem_document(document):
    translatedict = ""
    stemmer = PorterStemmer()
    for word in string.punctuation:
        translatedict = translatedict + word
    doc_stemmed = []
    for word in document.split():
        lowerstrippedword = ''.join(c for c in word.lower() if c not in translatedict)
        try: 
            stemmed_word = stemmer.stem(lowerstrippedword)
            doc_stemmed.append(stemmed_word)
        except:
            print lowerstrippedword + " could not be stemmed."
    return ' '.join(doc_stemmed)

def readFileandStem(filestring):
    with open(filestring, 'r') as file:
        reader = csv.reader(file)
        file_extras = []
        vector_data = []        
        error = False
        while (error == False):
            try:
                next = reader.next()
                if len(next) == 3 and next[2] != "":
                    document = next[2]
                    stemmed_document = stem_document(document)
                    vector_data.append(stemmed_document)
                    file_extra = []
                    file_extra.append(next[0])
                    file_extra.append(next[1])
                    file_extras.append(file_extra)
            except:
                error = True
    return [vector_data, file_extras]

filestring = 'Data.csv'
print "Reading File"
data = readFileandStem(filestring)
documents = data[0]
file_extras = data[1]
print "Vectorizing Data"
vectorizer = CountVectorizer()
matrix = vectorizer.fit_transform(documents)
tf_idf_transform = TfidfTransformer(use_idf=False).fit(matrix)
tf_idf_matrix = tf_idf_transform.transform(matrix)
with open('matrix/matrix.npy', 'w') as matrix_file:
    np.save(matrix_file, tf_idf_matrix)
file_json_map = {}
file_json_map['extras'] = file_extras
with open('matrix/extras.json', 'w') as extras_file:
    extras_file.write(json.dumps(file_json_map))
print "finished"

The next file is supposed to load the same file...

import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import json
import pdb

with open('matrix/matrix.npy', 'r') as matrix_file:
    matrix = np.load(matrix_file)

hcluster = linkage(matrix, "complete")

However, I get the following error:

File "Cluster.py", line 7, in <module>
    matrix = np.load(matrix_file)
  File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\npyio.py", line 406, in load
    pickle_kwargs=pickle_kwargs)
  File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\format.py", line 620, in read_array
    version = read_magic(fp)
  File "C:\Users\jarek\Anaconda2\lib\site-packages\numpy\lib\format.py", line 216, in read_magic
    raise ValueError(msg % (MAGIC_PREFIX, magic_str[:-2]))
ValueError: the magic string is not correct; expected '\x93NUMPY', got '\x00\x00I\x1c\x00\x00'

I don't know why the magic string would be incorrect because from what I've looked into, all .npy files are supposed to have the same magic string "\x93NUMPY".

Ideas?

not with with open(blahblah) as matrix_file. just try np.load(blahblah) — Jeon
– Jeon, Commented Oct 15, 2016 at 4:52
No luck with that solution. Tried: "matrix = np.load('matrix/matrix.npy')" — Jarek Bird
– Jarek Bird, Commented Oct 15, 2016 at 14:35

Di Kai · Accepted Answer · 2016-10-17 06:37:11Z

1

I encountered similar issue before.

Changing

open('matrix/matrix.npy', 'w')
...
open('matrix/matrix.npy', 'r')

to

open('matrix/matrix.npy', 'wb')
...
open('matrix/matrix.npy', 'rb')

solved my problem.

answered Oct 17, 2016 at 6:37

Di Kai

111 bronze badge

Sign up to request clarification or add additional context in comments.

Collectives™ on Stack Overflow

numpy.load() wrong magic string error

1 Answer 1

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Related