Boyer-Moore string search algorithm in C# — fast pattern matching explained
The Boyer-Moore algorithm is one of the most efficient string searching algorithms, particularly for large text and longer patterns. It searches from right to left but shifts the pattern intelligently to skip unnecessary comparisons, often running in sublinear time.
When to Use Boyer-Moore
Boyer-Moore excels when:
- Searching for patterns in large text (millions of characters)
- The pattern is relatively long (10+ characters)
- You need to find the same pattern repeatedly in different texts
- Performance is critical and you can't use native compiled solutions
For everyday string searching in C#, string.IndexOf() or string.Contains() is simpler and often faster for short patterns thanks to hardware optimizations. Use Boyer-Moore when you need the theoretical efficiency advantage or are implementing custom search logic.
How Boyer-Moore Works
Boyer-Moore uses two heuristics to skip sections of text:
- Bad Character Rule: When a mismatch occurs, shift the pattern to align the mismatched text character with its rightmost occurrence in the pattern
- Good Suffix Rule: When a mismatch occurs after matching a suffix, shift the pattern based on the matched suffix
Most implementations use just the bad character rule for simplicity — it provides most of the performance benefit.
Basic Boyer-Moore Implementation
public class BoyerMooreSearch
{
private readonly string _pattern;
private readonly Dictionary<char, int> _badCharShift;
public BoyerMooreSearch(string pattern)
{
if (string.IsNullOrEmpty(pattern))
throw new ArgumentException("Pattern cannot be null or empty", nameof(pattern));
_pattern = pattern;
_badCharShift = BuildBadCharTable(pattern);
}
private Dictionary<char, int> BuildBadCharTable(string pattern)
{
var table = new Dictionary<char, int>();
int patternLength = pattern.Length;
// For each character in the pattern, store how far we can shift
for (int i = 0; i < patternLength - 1; i++)
{
table[pattern[i]] = patternLength - 1 - i;
}
return table;
}
public int Search(string text)
{
if (string.IsNullOrEmpty(text))
return -1;
if (text.Length < _pattern.Length)
return -1;
int textLength = text.Length;
int patternLength = _pattern.Length;
int skip;
for (int i = 0; i <= textLength - patternLength; i += skip)
{
skip = 0;
// Check pattern from right to left
for (int j = patternLength - 1; j >= 0; j--)
{
if (_pattern[j] != text[i + j])
{
// Mismatch found - calculate shift
char mismatchChar = text[i + j];
skip = _badCharShift.TryGetValue(mismatchChar, out int shift)
? shift
: patternLength;
break;
}
}
// If skip is still 0, we found a match
if (skip == 0)
return i;
}
return -1; // Pattern not found
}
public List<int> SearchAll(string text)
{
var results = new List<int>();
if (string.IsNullOrEmpty(text) || text.Length < _pattern.Length)
return results;
int textLength = text.Length;
int patternLength = _pattern.Length;
int skip;
for (int i = 0; i <= textLength - patternLength; i += skip)
{
skip = 0;
for (int j = patternLength - 1; j >= 0; j--)
{
if (_pattern[j] != text[i + j])
{
char mismatchChar = text[i + j];
skip = _badCharShift.TryGetValue(mismatchChar, out int shift)
? shift
: patternLength;
break;
}
}
if (skip == 0)
{
results.Add(i);
skip = 1; // Move one position forward to find overlapping matches
}
}
return results;
}
}Usage Examples
// Find first occurrence
var searcher = new BoyerMooreSearch("algorithm");
string text = "The Boyer-Moore algorithm is a fast string search algorithm.";
int position = searcher.Search(text);
if (position >= 0)
Console.WriteLine($"Pattern found at index {position}");
// Pattern found at index 17
// Find all occurrences
var multiSearcher = new BoyerMooreSearch("test");
string multiText = "This is a test. Another test here. Final test!";
List<int> positions = multiSearcher.SearchAll(multiText);
foreach (int pos in positions)
Console.WriteLine($"Found at index {pos}");
// Found at index 10
// Found at index 24
// Found at index 42Case-Insensitive Search
public class CaseInsensitiveBoyerMoore
{
private readonly string _pattern;
private readonly Dictionary<char, int> _badCharShift;
public CaseInsensitiveBoyerMoore(string pattern)
{
if (string.IsNullOrEmpty(pattern))
throw new ArgumentException("Pattern cannot be null or empty", nameof(pattern));
// Normalize pattern to lowercase
_pattern = pattern.ToLowerInvariant();
_badCharShift = BuildBadCharTable(_pattern);
}
private Dictionary<char, int> BuildBadCharTable(string pattern)
{
var table = new Dictionary<char, int>();
int patternLength = pattern.Length;
for (int i = 0; i < patternLength - 1; i++)
{
table[pattern[i]] = patternLength - 1 - i;
}
return table;
}
public int Search(string text)
{
if (string.IsNullOrEmpty(text))
return -1;
// Normalize text to lowercase
string normalizedText = text.ToLowerInvariant();
int textLength = normalizedText.Length;
int patternLength = _pattern.Length;
if (textLength < patternLength)
return -1;
int skip;
for (int i = 0; i <= textLength - patternLength; i += skip)
{
skip = 0;
for (int j = patternLength - 1; j >= 0; j--)
{
if (_pattern[j] != normalizedText[i + j])
{
char mismatchChar = normalizedText[i + j];
skip = _badCharShift.TryGetValue(mismatchChar, out int shift)
? shift
: patternLength;
break;
}
}
if (skip == 0)
return i;
}
return -1;
}
}
// Usage
var caseInsensitive = new CaseInsensitiveBoyerMoore("ALGORITHM");
string text = "The Boyer-Moore algorithm is fast.";
int pos = caseInsensitive.Search(text);
Console.WriteLine($"Found at index {pos}"); // Found at index 17Performance Comparison
using System.Diagnostics;
public class SearchBenchmark
{
public static void CompareMethods()
{
string pattern = "sophisticated";
string text = new string('a', 1_000_000) + "sophisticated" + new string('b', 1_000_000);
// Built-in IndexOf
var sw1 = Stopwatch.StartNew();
int result1 = text.IndexOf(pattern);
sw1.Stop();
Console.WriteLine($"IndexOf: {sw1.ElapsedMilliseconds}ms, found at {result1}");
// Boyer-Moore
var sw2 = Stopwatch.StartNew();
var bm = new BoyerMooreSearch(pattern);
int result2 = bm.Search(text);
sw2.Stop();
Console.WriteLine($"Boyer-Moore: {sw2.ElapsedMilliseconds}ms, found at {result2}");
}
}
// Typical output (results vary by hardware):
// IndexOf: 2ms, found at 1000000
// Boyer-Moore: 3ms, found at 1000000Note: For most real-world C# applications, string.IndexOf() is optimized by the runtime and often faster than a managed Boyer-Moore implementation. Boyer-Moore shines in:
- Academic/educational contexts
- Custom scenarios where you need to modify the algorithm
- Very large texts with long patterns
- Platforms without native string search optimizations
Optimized Version with Span<char>
For better performance in modern C#, use Span<char> to avoid string allocations:
public class SpanBoyerMoore
{
private readonly string _pattern;
private readonly Dictionary<char, int> _badCharShift;
public SpanBoyerMoore(string pattern)
{
_pattern = pattern ?? throw new ArgumentNullException(nameof(pattern));
if (pattern.Length == 0)
throw new ArgumentException("Pattern cannot be empty", nameof(pattern));
_badCharShift = BuildBadCharTable(pattern.AsSpan());
}
private Dictionary<char, int> BuildBadCharTable(ReadOnlySpan<char> pattern)
{
var table = new Dictionary<char, int>();
int patternLength = pattern.Length;
for (int i = 0; i < patternLength - 1; i++)
{
table[pattern[i]] = patternLength - 1 - i;
}
return table;
}
public int Search(ReadOnlySpan<char> text)
{
ReadOnlySpan<char> pattern = _pattern.AsSpan();
int textLength = text.Length;
int patternLength = pattern.Length;
if (textLength < patternLength)
return -1;
int skip;
for (int i = 0; i <= textLength - patternLength; i += skip)
{
skip = 0;
for (int j = patternLength - 1; j >= 0; j--)
{
if (pattern[j] != text[i + j])
{
char mismatchChar = text[i + j];
skip = _badCharShift.TryGetValue(mismatchChar, out int shift)
? shift
: patternLength;
break;
}
}
if (skip == 0)
return i;
}
return -1;
}
}
// Usage
var spanSearcher = new SpanBoyerMoore("pattern");
string text = "This is a pattern matching example";
int index = spanSearcher.Search(text.AsSpan());
Console.WriteLine($"Found at {index}"); // Found at 10Real-World Use Cases
// 1. Log file analysis - find error patterns
public class LogAnalyzer
{
private readonly BoyerMooreSearch _errorSearcher;
public LogAnalyzer()
{
_errorSearcher = new BoyerMooreSearch("ERROR:");
}
public List<string> FindErrorLines(string logContent)
{
var errorLines = new List<string>();
var positions = _errorSearcher.SearchAll(logContent);
foreach (int pos in positions)
{
int lineStart = logContent.LastIndexOf('\n', pos) + 1;
int lineEnd = logContent.IndexOf('\n', pos);
if (lineEnd == -1) lineEnd = logContent.Length;
errorLines.Add(logContent.Substring(lineStart, lineEnd - lineStart));
}
return errorLines;
}
}
// 2. DNA sequence matching
public class DnaSequenceFinder
{
public List<int> FindSequence(string dna, string pattern)
{
var searcher = new BoyerMooreSearch(pattern);
return searcher.SearchAll(dna);
}
}
var dnaFinder = new DnaSequenceFinder();
string genome = "ATCGATCGATCGATCGAAATCGATCG";
string sequence = "ATCG";
var matches = dnaFinder.FindSequence(genome, sequence);
Console.WriteLine($"Found {matches.Count} occurrences");
// 3. Text document search
public class DocumentSearcher
{
public Dictionary<string, List<int>> SearchInDocuments(
Dictionary<string, string> documents,
string searchTerm)
{
var results = new Dictionary<string, List<int>>();
var searcher = new BoyerMooreSearch(searchTerm);
foreach (var doc in documents)
{
var positions = searcher.SearchAll(doc.Value);
if (positions.Count > 0)
results[doc.Key] = positions;
}
return results;
}
}Time Complexity
-
Best case: O(n/m) where n = text length, m = pattern length
Sublinear! The algorithm can skip sections of text -
Worst case: O(n × m)
Occurs with highly repetitive patterns and text (rare in practice) -
Average case: O(n)
Much better than naive O(n × m) string matching -
Preprocessing: O(m + σ) where σ = alphabet size
Building the bad character table
Key Takeaways
✅ Use Boyer-Moore when:
- Working with very large texts
- Searching for long patterns repeatedly
- Implementing custom search algorithms
- Learning about algorithm optimization
❌ Use string.IndexOf() when:
- Doing everyday string searches
- Working with short patterns
- Performance is good enough (usually the case)
The Boyer-Moore algorithm demonstrates clever optimization — by searching right-to-left and using character occurrence information, it achieves sublinear search time in many cases, making it one of the fastest string search algorithms in practice.
Take It Further

The community's favorite C# guide — teaches every concept you just read about with clarity.
The C# Player's Guide — 5th Edition · RB Whitaker
Get it on Amazon →As an Amazon Associate I earn from qualifying purchases.