I am building an algorithm to run proximal spectral gradient for research purposes. The code is almost in its finalized step, but the runtime of the code is slow. I hope to seek your professional review to improve my code efficiency.
Below is the code for my algorithm:
from numpy import ones_like,array,diag,trace,identity,negative,random,cov,mean
from math import sqrt
from numpy.linalg import norm, inv
from numba import njit,jit
@njit(fastmath={'nsz'})
def project_f(v, mu, covar, lam_k, beta_1, theta, beta_2, gam):
func = 0.5*beta_1*v.T@covar@v - theta*mu.T@v + gam/2*(v.sum()-1.)**2 + lam_k * (
v.sum()-1.) + beta_2/2*v@v
return func
@njit(fastmath={'nsz'})
def project_df(v, mu, covar, lam_k, beta_1, theta, beta_2, gam):
par_func = beta_1*covar@v - theta*mu + gam*ones_like(v)*(v.sum()-1.) + lam_k*ones_like(v) + beta_2*v
return par_func
@njit(fastmath={'nsz'})
def aug_lag(lam, gam, v):
lam += gam*(v.sum()-1.)
return lam
@njit(fastmath={'nsz'})
def lipschitz(beta_1,beta_2,gam,v,covar):
lips = beta_1*sqrt(trace(covar@covar)) + beta_2*sqrt(len(v)) + gam*len(v)
return min(1.,1./lips)
@njit(fastmath={'nsz'})
def spectral_grad(v,v_1,g,g_1,prev_B):
s = v_1 - v
y = g_1 - g
if s.T@s > s.T@y:
s1 = s**4
s2 = s**2
w_k = ((s.T@s) - (s.T@y)) / s1.sum()
B_k = array([1./(1.+ w_k*i) for i in s2])
return diag(B_k)
return prev_B
#return np.identity(len(s)) #rescaling
@njit(fastmath={'nsz'})
def proximal(vec,sparsity,b1,b2,gam,covar,grad):
L = lipschitz(b1,b2,gam,vec,covar)
parameter = (2*sparsity/(L+1/L))**0.5
v = []
for i,j in zip(vec,grad):
if abs(i) < parameter:
v.append(0)
else:
new_v = i - j/(L+1/L)
v.append(new_v)
return array(v)
@njit(fastmath={'nsz'})
def return_and_risk(vec,mu,covar):
return vec@mu,vec@covar@vec
@njit(fastmath=True)
def negative_count(vec):
result = 0
for i in vec:
if i < 0:
result += 1
return result
@njit(fastmath=True)
def zero_count(vec):
result = 0
for i in vec:
if i == 0:
result += 1
return result
@jit(forceobj=True)
def spectral_gradient1(df,v,covar,mu,b1,thet,b2,gam,tol,lam,spars,MAX_ITER=2000):
vec = v
B = identity(len(vec))
aug_lam = lam
vec_sum, grad = [],[]
alpha = lipschitz(b1, b2, gam, vec, covar)
for i in range(MAX_ITER):
gradient = df(vec,mu,covar,aug_lam,b1,thet,b2,gam)
direction = negative(inv(B) @ gradient)
profit, std_dev = return_and_risk(vec,mu,covar)
vec_1 = vec + alpha * direction
gradient_1 = df(vec_1,mu,covar,aug_lam,b1,thet,b2,gam)
B = spectral_grad(vec,vec_1,gradient,gradient_1,B)
aug_lam = aug_lag(aug_lam, gam, vec_1)
vec = proximal(vec_1,spars,b1,b2,gam,covar,gradient_1)
vec_sum.append(vec.sum())
grad.append(norm(gradient,2))
# stopping criteria
if norm(gradient,2) <= tol:
#print("The optimum vector for", {df}, " is at ", vec,"at iteration ", i+1)
#negat = negative_count(vec)
#print("No of negative: ", negat)
#zero = zero_count(vec)
#print("No. of zeros: ", zero)
#print("Vector sum: ",sum(vec))
#print("Gradient: ", norm(gradient,2))
break
if i == MAX_ITER-1:
#print('Higher no. of iterations is needed')
#print("Vector: ", vec)
#negat = negative_count(vec)
#print("No of negative: ",negat)
#zero = zero_count(vec)
#print("No. of zeros: ", zero)
#print("Vector sum: ",sum(vec))
#print("Gradient: ", norm(gradient,2))
break
return vec, array(vec_sum), array(grad), i+1, profit, std_dev
tol, spars, ini_lam = 1.e-4, 1.e-3, 1000
beta_1,theta,beta_2,gam = 1.,.5,1.,1.
sim_data = -1+2*random.rand(1000,5)
sim_mean = mean(sim_data,axis=0)
sim_covar = cov(sim_data,rowvar=False)
v = array([1. / (len(sim_mean)) for i in range(len(sim_mean))])
vec2000,vsum,grad,_,_,_ =spectral_gradient1(project_df,v,sim_covar,sim_mean,beta_1,theta,beta_2,gam,tol,ini_lam,spars)
I have run the code once to check on its speed and it definitely needs to be optimized more
%timeit spectral_gradient1(project_df,v,sim_covar,sim_mean,beta_1,theta,beta_2,gam,tol,ini_lam,spars)
3.15 ms ± 1.09 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)