EDIT: You can also make things a lot faster (like 1000x) with less memory using Numba:
import numpy as np
import numba as nb
def is_sub_arr_np(a1, a2):
l1, = a1.shape
s1, = a1.strides
l2, = a2.shape
a1_win = np.lib.stride_tricks.as_strided(a1, (l1 - l2 + 1, l2), (s1, s1))
return np.any(np.all(a1_win == a2, axis=1))
@nb.jit(parallel=True)
def is_sub_arr_nb(a1, a2):
for i in nb.prange(len(a1) - len(a2) + 1):
for j in range(len(a2)):
if a1[i + j] != a2[j]:
break
else:
return True
return False
# Test
np.random.seed(0)
arr1 = np.random.randint(100, size=100_000)
arr2 = np.random.randint(100, size=1_000)
print(is_sub_arr_np(arr1, arr2))
# False
print(is_sub_arr_nb(arr1, arr2))
# False
# Now enforce a match at the end
arr1[-len(arr2):] = arr2
print(is_sub_arr_np(arr1, arr2))
# True
print(is_sub_arr_nb(arr1, arr2))
# True
# Timing
%timeit is_sub_arr_np(arr1, arr2)
# 99.4 ms ± 567 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit is_sub_arr_nb(arr1, arr2)
# 124 µs ± 863 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
Not sure this is the most efficient answer but it is one possible solution:
import numpy as np
def is_sub_arr(a1, a2):
l1, = a1.shape
s1, = a1.strides
l2, = a2.shape
a1_win = np.lib.stride_tricks.as_strided(a1, (l1 - l2 + 1, l2), (s1, s1))
return np.any(np.all(a1_win == a2, axis=1))
arr1 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
arr2 = np.array([4, 5, 6])
arr3 = np.array([4, 5, 7])
print(is_sub_arr(arr1, arr2))
# True
print(is_sub_arr(arr1, arr3))
# False
arr2 in arr1will of course not work, because it's True only if the 1-d arrayarr2is a an element in a 2-d arrayTrueonset([4, 5, 7]).issubset([7, 6, 4, 5, 9, 8])arr1be sorted?