In case you need it really fast for large arrays you could even use numbas prange to process the count in parallel (for small arrays it will be slower due to the parallel-processing overhead).
import numpy as np
from numba import njit, prange
@njit(parallel=True)
def parallel_nonzero_count(arr):
flattened = arr.ravel()
sum_ = 0
for i in prange(flattened.size):
sum_ += flattened[i] != 0
return sum_
Note that when you use numba you normally want to write out your loops because that's what numba is really very good at optimizing.
I actually timed it against the other solutions mentioned here (using my Python module simple_benchmark):

Code to reproduce:
import numpy as np
from numba import njit, prange
@njit
def n_nonzero(a):
return a[a != 0].size
@njit
def count_non_zero(np_arr):
return len(np.nonzero(np_arr)[0])
@njit()
def methodB(a):
return (a!=0).sum()
@njit(parallel=True)
def parallel_nonzero_count(arr):
flattened = arr.ravel()
sum_ = 0
for i in prange(flattened.size):
sum_ += flattened[i] != 0
return sum_
@njit()
def count_loop(a):
s = 0
for i in a:
if i != 0:
s += 1
return s
from simple_benchmark import benchmark
args = {}
for exp in range(2, 20):
size = 2**exp
arr = np.random.random(size)
arr[arr < 0.3] = 0.0
args[size] = arr
b = benchmark(
funcs=(n_nonzero, count_non_zero, methodB, np.count_nonzero, parallel_nonzero_count, count_loop),
arguments=args,
argument_name='array size',
warmups=(n_nonzero, count_non_zero, methodB, np.count_nonzero, parallel_nonzero_count, count_loop)
)
importstatements, decorators and sample test harness.