4

I try to fit Randomized Logistic Regression with my data and I cannot proceed. Here is the code:

import numpy as np    
X = np.load("X.npy")
y = np.load("y.npy")

randomized_LR = RandomizedLogisticRegression(C=0.1, verbose=True, n_jobs=3)
randomized_LR.fit(X, y)

This gives an error:

    344     if issparse(X):
    345         size = len(weights)
    346         weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
    347         X = X * weight_dia
    348     else:
--> 349         X *= (1 - weights)
    350
    351     C = np.atleast_1d(np.asarray(C, dtype=np.float))
    352     scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)
    353

ValueError: output array is read-only

Could someone point out what I should do to proceed please?

Thank u very much

Hendra

Complete Traceback as requested:

Traceback (most recent call last):
  File "temp.py", line 88, in <module>
  train_randomized_logistic_regression()
  File "temp.py", line 82, in train_randomized_logistic_regression
randomized_LR.fit(X, y)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 110, in fit
sample_fraction=self.sample_fraction, **params)
File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py", line 281, in __call__
return self.func(*args, **kwargs)
File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 52, in _resample_model
for _ in range(n_resampling)):
File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 660, in __call__
self.retrieve()
File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 543, in retrieve
raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in <module>()
     83
     84
     85
     86 if __name__ == '__main__':
     87
---> 88     train_randomized_logistic_regression()
     89
     90
     91
     92

...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in train_randomized_logistic_regression()
     77     X = np.load( 'data/issuemakers/features/new_X.npy')
     78     y = np.load( 'data/issuemakers/features/new_y.npy')
     79
     80     randomized_LR = RandomizedLogisticRegression(C=0.1, n_jobs=32)
     81
---> 82     randomized_LR.fit(X, y)
    randomized_LR.fit = <bound method RandomizedLogisticRegression.fit o...d=0.25,
           tol=0.001, verbose=False)>
    X = array([[  1.01014900e+06,   7.29970000e+04,   2....460000e+04,   3.11428571e+01,   1.88100000e+03]])
    y = array([1, 1, 1, ..., 0, 1, 1])
     83
     84
     85
     86 if __name__ == '__main__':

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in  fit(self=RandomizedLogisticRegression(C=0.1, fit_intercep...ld=0.25,
           tol=0.001, verbose=False), X=array([[  6.93135506e-04,   8.93676615e-04,    -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]))
    105         )(
    106             estimator_func, X, y,
    107             scaling=self.scaling, n_resampling=self.n_resampling,
    108             n_jobs=self.n_jobs, verbose=self.verbose,
    109             pre_dispatch=self.pre_dispatch, random_state=self.random_state,
--> 110             sample_fraction=self.sample_fraction, **params)
    self.sample_fraction = 0.75
    params = {'C': 0.1, 'fit_intercept': True, 'tol': 0.001}
    111
    112         if scores_.ndim == 1:
    113             scores_ = scores_[:, np.newaxis]
    114         self.all_scores_ = scores_

 ...........................................................................
 /home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py in __call__(self=NotMemorizedFunc(func=<function _resample_model at 0x7fb5d7d12b18>), *args=(<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1])), **kwargs={'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False})
    276     # Should be a light as possible (for speed)
    277     def __init__(self, func):
    278         self.func = func
    279
    280     def __call__(self, *args, **kwargs):
--> 281         return self.func(*args, **kwargs)
    self.func = <function _resample_model>
    args = (<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1]))
    kwargs = {'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False}
282
283     def call_and_shelve(self, *args, **kwargs):
284         return NotMemorizedResult(self.func(*args, **kwargs))
285

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in _resample_model(estimator_func=<function _randomized_logistic>, X=array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]), scaling=0.5, n_resampling=200, n_jobs=32, verbose=False, pre_dispatch='3*n_jobs', random_state=<mtrand.RandomState object>, sample_fraction=0.75, **params={'C': 0.1, 'fit_intercept': True, 'tol': 0.001})
     47                 X, y, weights=scaling * random_state.random_integers(
     48                     0, 1, size=(n_features,)),
     49                 mask=(random_state.rand(n_samples) < sample_fraction),
     50                 verbose=max(0, verbose - 1),
     51                 **params)
---> 52             for _ in range(n_resampling)):
    n_resampling = 200
     53         scores_ += active_set
     54
     55     scores_ /= n_resampling
     56     return scores_

 ...........................................................................
 /home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=32), iterable=<itertools.islice object>)
    655             if pre_dispatch == "all" or n_jobs == 1:
    656                 # The iterable was consumed all at once by the above for loop.
    657                 # No need to wait for async callbacks to trigger to
    658                 # consumption.
    659                 self._iterating = False
--> 660             self.retrieve()
    self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=32)>
    661             # Make sure that we get a last message telling us we are done
    662             elapsed_time = time.time() - self._start_time
    663             self._print('Done %3i out of %3i | elapsed: %s finished',
    664                         (len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Jan  2 12:13:54 2015
PID: 126664                Python 2.7.8: /home/hbunyam1/anaconda/bin/python
...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.pyc in _randomized_logistic(X=memmap([[  6.93135506e-04,   8.93676615e-04,  -1...234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]), weights=array([ 0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,...  0. ,  0. ,  0.5,  0. ,  0. ,  0. ,  0. ,  0.5]), mask=array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), C=0.1, verbose=0, fit_intercept=True, tol=0.001)
    344     if issparse(X):
    345         size = len(weights)
    346         weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
    347         X = X * weight_dia
    348     else:
--> 349         X *= (1 - weights)
    350
    351     C = np.atleast_1d(np.asarray(C, dtype=np.float))
    352     scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)
    353

ValueError: output array is read-only
___________________________________________________________________________







Traceback (most recent call last):
  File "temp.py", line 88, in <module>
    train_randomized_logistic_regression()
  File "temp.py", line 82, in train_randomized_logistic_regression
    randomized_LR.fit(X, y)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 110, in fit
    sample_fraction=self.sample_fraction, **params)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py", line 281, in __call__
    return self.func(*args, **kwargs)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 52, in _resample_model
    for _ in range(n_resampling)):
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 660, in __call__
    self.retrieve()
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 543, in retrieve
    raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in <module>()
     83
     84
     85
     86 if __name__ == '__main__':
     87
---> 88     train_randomized_logistic_regression()
     89
     90
     91
     92

...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in train_randomized_logistic_regression()
     77     X = np.load( 'data/issuemakers/features/new_X.npy')
     78     y = np.load( 'data/issuemakers/features/new_y.npy')
     79
     80     randomized_LR = RandomizedLogisticRegression(C=0.1, n_jobs=32)
     81
---> 82     randomized_LR.fit(X, y)
        randomized_LR.fit = <bound method RandomizedLogisticRegression.fit o...d=0.25,
               tol=0.001, verbose=False)>
        X = array([[  1.01014900e+06,   7.29970000e+04,   2....460000e+04,   3.11428571e+01,   1.88100000e+03]])
        y = array([1, 1, 1, ..., 0, 1, 1])
     83
     84
     85
     86 if __name__ == '__main__':

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in fit(self=RandomizedLogisticRegression(C=0.1, fit_intercep...ld=0.25,
               tol=0.001, verbose=False), X=array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]))
    105         )(
    106             estimator_func, X, y,
    107             scaling=self.scaling, n_resampling=self.n_resampling,
    108             n_jobs=self.n_jobs, verbose=self.verbose,
    109             pre_dispatch=self.pre_dispatch, random_state=self.random_state,
--> 110             sample_fraction=self.sample_fraction, **params)
        self.sample_fraction = 0.75
        params = {'C': 0.1, 'fit_intercept': True, 'tol': 0.001}
    111
    112         if scores_.ndim == 1:
    113             scores_ = scores_[:, np.newaxis]
    114         self.all_scores_ = scores_

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py in __call__(self=NotMemorizedFunc(func=<function _resample_model at 0x7fb5d7d12b18>), *args=(<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1])), **kwargs={'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False})
    276     # Should be a light as possible (for speed)
    277     def __init__(self, func):
    278         self.func = func
    279
    280     def __call__(self, *args, **kwargs):
--> 281         return self.func(*args, **kwargs)
        self.func = <function _resample_model>
        args = (<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1]))
        kwargs = {'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False}
    282
    283     def call_and_shelve(self, *args, **kwargs):
    284         return NotMemorizedResult(self.func(*args, **kwargs))
    285

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in _resample_model(estimator_func=<function _randomized_logistic>, X=array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]), scaling=0.5, n_resampling=200, n_jobs=32, verbose=False, pre_dispatch='3*n_jobs', random_state=<mtrand.RandomState object>, sample_fraction=0.75, **params={'C': 0.1, 'fit_intercept': True, 'tol': 0.001})
     47                 X, y, weights=scaling * random_state.random_integers(
     48                     0, 1, size=(n_features,)),
     49                 mask=(random_state.rand(n_samples) < sample_fraction),
     50                 verbose=max(0, verbose - 1),
     51                 **params)
---> 52             for _ in range(n_resampling)):
        n_resampling = 200
     53         scores_ += active_set
     54
     55     scores_ /= n_resampling
     56     return scores_

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=32), iterable=<itertools.islice object>)
    655             if pre_dispatch == "all" or n_jobs == 1:
    656                 # The iterable was consumed all at once by the above for loop.
    657                 # No need to wait for async callbacks to trigger to
    658                 # consumption.
    659                 self._iterating = False
--> 660             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=32)>
    661             # Make sure that we get a last message telling us we are done
    662             elapsed_time = time.time() - self._start_time
    663             self._print('Done %3i out of %3i | elapsed: %s finished',
    664                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Fri Jan  2 12:13:54 2015
PID: 126664                Python 2.7.8: /home/hbunyam1/anaconda/bin/python
...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.pyc in _randomized_logistic(X=memmap([[  6.93135506e-04,   8.93676615e-04,  -1...234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]), weights=array([ 0.5,  0. ,  0. ,  0.5,  0. ,  0.5,  0. ,...  0. ,  0. ,  0.5,  0. ,  0. ,  0. ,  0. ,  0.5]), mask=array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), C=0.1, verbose=0, fit_intercept=True, tol=0.001)
    344     if issparse(X):
    345         size = len(weights)
    346         weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
    347         X = X * weight_dia
    348     else:
--> 349         X *= (1 - weights)
    350
    351     C = np.atleast_1d(np.asarray(C, dtype=np.float))
    352     scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)
    353

ValueError: output array is read-only
___________________________________________________________________________
[hbunyam1@zookst20 social_graph]$ python temp.py
Traceback (most recent call last):
  File "temp.py", line 88, in <module>
    train_randomized_logistic_regression()
  File "temp.py", line 82, in train_randomized_logistic_regression
    randomized_LR.fit(X, y)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 110, in fit
    sample_fraction=self.sample_fraction, **params)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py", line 281, in __call__
    return self.func(*args, **kwargs)
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py", line 52, in _resample_model
    for _ in range(n_resampling)):
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 660, in __call__
    self.retrieve()
  File "/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 543, in retrieve
    raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in <module>()
     83
     84
     85
     86 if __name__ == '__main__':
     87
---> 88     train_randomized_logistic_regression()
     89
     90
     91
     92

...........................................................................
/zfs/ilps-plexest/homedirs/hbunyam1/social_graph/temp.py in train_randomized_logistic_regression()
     77     X = np.load( 'data/issuemakers/features/new_X.npy', mmap_mode='r+')
     78     y = np.load( 'data/issuemakers/features/new_y.npy', mmap_mode='r+')
     79
     80     randomized_LR = RandomizedLogisticRegression(C=0.1, n_jobs=32)
     81
---> 82     randomized_LR.fit(X, y)
        randomized_LR.fit = <bound method RandomizedLogisticRegression.fit o...d=0.25,
               tol=0.001, verbose=False)>
        X = memmap([[  1.01014900e+06,   7.29970000e+04,   2...460000e+04,   3.11428571e+01,   1.88100000e+03]])
        y = memmap([1, 1, 1, ..., 0, 1, 1])
     83
     84
     85
     86 if __name__ == '__main__':

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in fit(self=RandomizedLogisticRegression(C=0.1, fit_intercep...ld=0.25,
               tol=0.001, verbose=False), X=array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]))
    105         )(
    106             estimator_func, X, y,
    107             scaling=self.scaling, n_resampling=self.n_resampling,
    108             n_jobs=self.n_jobs, verbose=self.verbose,
    109             pre_dispatch=self.pre_dispatch, random_state=self.random_state,
--> 110             sample_fraction=self.sample_fraction, **params)
        self.sample_fraction = 0.75
        params = {'C': 0.1, 'fit_intercept': True, 'tol': 0.001}
    111
    112         if scores_.ndim == 1:
    113             scores_ = scores_[:, np.newaxis]
    114         self.all_scores_ = scores_

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/memory.py in __call__(self=NotMemorizedFunc(func=<function _resample_model at 0x7f192c829b18>), *args=(<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1])), **kwargs={'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False})
    276     # Should be a light as possible (for speed)
    277     def __init__(self, func):
    278         self.func = func
    279
    280     def __call__(self, *args, **kwargs):
--> 281         return self.func(*args, **kwargs)
        self.func = <function _resample_model>
        args = (<function _randomized_logistic>, array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), array([1, 1, 1, ..., 0, 1, 1]))
        kwargs = {'C': 0.1, 'fit_intercept': True, 'n_jobs': 32, 'n_resampling': 200, 'pre_dispatch': '3*n_jobs', 'random_state': None, 'sample_fraction': 0.75, 'scaling': 0.5, 'tol': 0.001, 'verbose': False}
    282
    283     def call_and_shelve(self, *args, **kwargs):
    284         return NotMemorizedResult(self.func(*args, **kwargs))
    285

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.py in _resample_model(estimator_func=<function _randomized_logistic>, X=array([[  6.93135506e-04,   8.93676615e-04,  -1....234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=array([1, 1, 1, ..., 0, 1, 1]), scaling=0.5, n_resampling=200, n_jobs=32, verbose=False, pre_dispatch='3*n_jobs', random_state=<mtrand.RandomState object>, sample_fraction=0.75, **params={'C': 0.1, 'fit_intercept': True, 'tol': 0.001})
     47                 X, y, weights=scaling * random_state.random_integers(
     48                     0, 1, size=(n_features,)),
     49                 mask=(random_state.rand(n_samples) < sample_fraction),
     50                 verbose=max(0, verbose - 1),
     51                 **params)
---> 52             for _ in range(n_resampling)):
        n_resampling = 200
     53         scores_ += active_set
     54
     55     scores_ /= n_resampling
     56     return scores_

...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=32), iterable=<itertools.islice object>)
    655             if pre_dispatch == "all" or n_jobs == 1:
    656                 # The iterable was consumed all at once by the above for loop.
    657                 # No need to wait for async callbacks to trigger to
    658                 # consumption.
    659                 self._iterating = False
--> 660             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=32)>
    661             # Make sure that we get a last message telling us we are done
    662             elapsed_time = time.time() - self._start_time
    663             self._print('Done %3i out of %3i | elapsed: %s finished',
    664                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Fri Jan  2 12:57:25 2015
PID: 127177                Python 2.7.8: /home/hbunyam1/anaconda/bin/python
...........................................................................
/home/hbunyam1/anaconda/lib/python2.7/site-packages/sklearn/linear_model/randomized_l1.pyc in _randomized_logistic(X=memmap([[  6.93135506e-04,   8.93676615e-04,  -1...234095e-04,  -1.19037488e-04,   4.20921021e-04]]), y=memmap([1, 1, 1, ..., 0, 0, 1]), weights=array([ 0.5,  0.5,  0. ,  0.5,  0.5,  0.5,  0.5,...  0. ,  0.5,  0. ,  0. ,  0.5,  0.5,  0.5,  0.5]), mask=array([ True,  True,  True, ..., False, False,  True], dtype=bool), C=0.1, verbose=0, fit_intercept=True, tol=0.001)
    344     if issparse(X):
    345         size = len(weights)
    346         weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
    347         X = X * weight_dia
    348     else:
--> 349         X *= (1 - weights)
    350
    351     C = np.atleast_1d(np.asarray(C, dtype=np.float))
    352     scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)
    353

ValueError: output array is read-only
___________________________________________________________________________

4 Answers 4

3

I have received the same error when running the function on a 32 processor Ubuntu Server. While the problem persisted on n_jobs values above 1, it went away when setting the n_jobs value to the default i.e. 1. [as benbo described it]

This is a bug in the RandomizedLogisticRegression, where multiple accesses in memory to the same object block prevent each other from accessing it.

Please refer to the sklearn github page, they address this issue and possible fixes in depth: https://github.com/scikit-learn/scikit-learn/issues/4597

Sign up to request clarification or add additional context in comments.

Comments

3

The reason is the max_nbytes parameter within the Parallel invocation of the Joblib-library used by Scikit-learn internally when you set n_jobs>1, which is 1M by default. The definition of this parameter is:

Threshold on the size of arrays passed to the workers that triggers automated memory mapping in temp_folder.

More details can be found here: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html#

So, once the arrays pass the size of 1M, joblib will throw the error ValueError: assignment destination is read-only. This error is easy to replicate. Let's look at the following code:

import numpy as np
from sklearn.linear_model import RandomizedLogisticRegression
# Create some random data
samples = 2621
X = np.random.randint(1,100, size=(samples, 50))
y = np.random.randint(100,200, size=(samples))

randomized_LR = RandomizedLogisticRegression(C=0.1, verbose=True, n_jobs=3)
randomized_LR.fit(X, y)

This will run without any problems and if we look at the size of X, by using print(X.nbytes/1024**2), this will show us that the X-array is 0.9998321533203125Megabyte and thus not too big.

If we run the same code again, but change the number of samples to 2622:

import numpy as np
from sklearn.linear_model import RandomizedLogisticRegression

samples = 2622
X = np.random.randint(1,100, size=(samples, 50))
print(X.nbytes/1024**2)
y = np.random.randint(100,200, size=(samples))

randomized_LR = RandomizedLogisticRegression(C=0.1, verbose=True, n_jobs=3)
randomized_LR.fit(X, y)

Python crashes with ValueError: output array is read-only, checking the size of the X-array will tell us that it is 1.000213623046875Megabyte and thus too big.

Comments

0

You may have to use np.load('X.npy', mmap_mode='r+') as per the documentation of numpy.load.

4 Comments

Thanks; however, the error still pops out and is the same.
@HendraBunyamin can you post the full traceback?
I'm confused; did you try X = np.load( 'data/issuemakers/features/new_X.npy', mmap_mode='r+')?
Sorry I didn't do the mmap_mode='r+' and I post again with the mmap_mode. Thank You
0

Try changing the number of jobs, maybe to 1 for a start. I ran into the same error when running RandomizedLogisticRegression with n_jobs=20 (on a powerful machine). However, the code ran without any problems when n_jobs was set to the default 1.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.