I was thinking that maybe something other than the brute-force method would work and discovered the Boyer-Moore search algorithm. Shamelessly translating the C and Java code found at Boyer–Moore string search algorithm into VB.NET, I arrived at
Public Class BoyerMooreSearch
' from C and Java code at http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
Private Shared Function SuffixLength(needle As Byte(), p As Integer) As Integer
Dim len As Integer = 0
Dim j = needle.Length - 1
Dim i = 0
While i >= 0 AndAlso needle(i) = needle(j)
i -= 1
j -= 1
len += 1
End While
Return len
End Function
Private Shared Function GetOffsetTable(needle As Byte()) As Integer()
Dim table(needle.Length - 1) As Integer
Dim lastPrefixPosition = needle.Length
For i = needle.Length - 1 To 0 Step -1
If Isprefix(needle, i + 1) Then
lastPrefixPosition = i + 1
End If
table(needle.Length - 1 - i) = lastPrefixPosition - i + needle.Length - 1
Next
For i = 0 To needle.Length - 2
Dim slen = SuffixLength(needle, i)
table(slen) = needle.Length - 1 - i + slen
Next
Return table
End Function
Private Shared Function Isprefix(needle As Byte(), p As Integer) As Boolean
Dim j = 0
For i = p To needle.Length - 1
If needle(i) <> needle(j) Then
Return False
End If
j += 1
Next
Return True
End Function
Private Shared Function GetCharTable(needle As Byte()) As Integer()
Const ALPHABET_SIZE As Integer = 256
Dim table(ALPHABET_SIZE - 1) As Integer
For i = 0 To table.Length - 1
table(i) = needle.Length
Next
For i = 0 To needle.Length - 2
table(needle(i)) = needle.Length - 1 - i
Next
Return table
End Function
Shared Function IndexOf(haystack As Byte(), needle As Byte()) As Integer
If needle.Length = 0 Then
Return 0
End If
Dim charTable = GetCharTable(needle)
Dim offsetTable = GetOffsetTable(needle)
Dim i = needle.Length - 1
While i < haystack.Length
Dim j = needle.Length - 1
While j >= 0 AndAlso haystack(i) = needle(j)
i -= 1
j -= 1
End While
If j < 0 Then
Return i + 1
End If
i += Math.Max(offsetTable(needle.Length - 1 - j), charTable(haystack(i)))
End While
Return -1
End Function
End Class
And to test it (suspecting that the LINQ code presented by @OneFineDay would demolish it for performance):
Imports System.IO
Imports System.Text
Module Module1
Dim bytesToCheck As List(Of Byte())
Dim rand As New Random
Function GetTestByteArrays() As List(Of Byte())
Dim testBytes As New List(Of Byte())
' N.B. adjust the numbers used in CreateTestFile according to the quantity (e.g. 10) of testData used
For i = 1 To 10
testBytes.Add(Encoding.ASCII.GetBytes("ABCDEFgfdhgf" & i.ToString() & "sdfgjdfjFGH"))
Next
Return testBytes
End Function
Sub CreateTestFile(f As String)
' Make a 4MB file of test data
' write a load of bytes which are not going to be in the
' judiciously chosen data to search for...
Using fs As New FileStream(f, FileMode.Create, FileAccess.Write)
For i = 0 To 2 ^ 22 - 1
fs.WriteByte(CByte(rand.Next(128, 256)))
Next
End Using
' ... and put the known data into the test data
Using fs As New FileStream(f, FileMode.Open)
For i = 0 To bytesToCheck.Count - 1
fs.Position = CLng(i * 2 ^ 18)
fs.Write(bytesToCheck(i), 0, bytesToCheck(i).Length)
Next
End Using
End Sub
Sub Main()
' the byte sequences to be searched for
bytesToCheckFor = GetTestByteArrays()
' Make a test file so that the data can be inspected
Dim testFileName As String = "C:\temp\testbytes.dat"
CreateTestFile(testFileName)
Dim fileData = File.ReadAllBytes(testFileName)
Dim sw As New Stopwatch
Dim containsP As Boolean = True
sw.Reset()
sw.Start()
For i = 0 To bytesToCheckFor.Count - 1
If BoyerMooreSearch.IndexOf(fileData, bytesToCheckFor(i)) = -1 Then
containsP = False
Exit For
End If
Next
sw.Stop()
Console.WriteLine("Boyer-Moore: {0} in {1}", containsP, sw.ElapsedTicks)
sw.Reset()
sw.Start()
Dim temp As New List(Of Byte)
Array.ForEach(bytesToCheckFor.ToArray, Sub(byteArray) Array.ForEach(byteArray, Sub(_byte) temp.Add(_byte)))
Dim result = fileData.All(Function(_byte) temp.Contains(_byte))
sw.Stop()
Console.WriteLine("LINQ: {0} in {1}", result, sw.ElapsedTicks)
Console.ReadLine()
End Sub
End Module
Now, I know that the byte sequences to match are in the test file (I confirmed that by using a hex editor to search for them) and, assuming (oh dear!) I am using the other method correctly, the latter doesn't work whereas mine does:
Boyer-Moore: True in 23913
LINQ: False in 3224
I did also test the first code example by OneFineDay for searching for small vs large patterns to match, and for less than seven or eight bytes that code was faster than Boyer-Moore. So, if you would care to test it for the size of data you're searching in and the size of the patterns you're looking for, Boyer-Moore might be a better fit to your "If possible, I'd like to do this as fast as possible."
EDIT
Further to the OP's uncertainty as to whether or not my suggested method works, here is a test with very small sample data:
Sub Test()
bytesToCheckFor = New List(Of Byte())
bytesToCheckFor.Add({0, 1}) ' the data to search for
bytesToCheckFor.Add({1, 2})
bytesToCheckFor.Add({0, 2})
Dim fileData As Byte() = {0, 1, 2} ' the file data
' METHOD 1: Boyer-Moore
Dim containsP As Boolean = True
For i = 0 To bytesToCheckFor.Count - 1
If BoyerMooreSearch.IndexOf(fileData, bytesToCheckFor(i)) = -1 Then
containsP = False
Exit For
End If
Next
Console.WriteLine("Boyer-Moore: {0}", containsP)
' METHOD 2: LINQ
Dim temp As New List(Of Byte)
Array.ForEach(bytesToCheckFor.ToArray, Sub(byteArray) Array.ForEach(byteArray, Sub(_byte) temp.Add(_byte)))
Dim result = fileData.All(Function(_byte) temp.Contains(_byte))
Console.WriteLine("LINQ: {0}", result)
Console.ReadLine()
End Sub
Outputs:
Boyer-Moore: False
LINQ: True
Also, I renamed the variables in my original Main() method to hopefully make them more meaningful.