fm_index

class FMIndex:

An FM-index for efficient full-text search on a single string.

The FM-index is a compressed text index based on the Burrows–Wheeler Transform (BWT).
It supports fast substring queries whose runtime depends only on the pattern length,
not on the size of the indexed text.

Construction

Time / Space Complexity

  • Time: O(N log σ)
  • Space: O(N log σ)

where:

  • N = length of the indexed string
  • σ = size of the alphabet (2⁸ for UCS-1, 2¹⁶ for UCS-2, etc.)
from fm_index import FMIndex

fm = FMIndex("mississippi")
FMIndex(data: str)
def item(self) -> str:

Convert the FMIndex back into the original string.

Complexity

  • Time: O(N log σ)
  • Space: O(N)

Examples

fm.item()
# 'mississippi'
def contains(self, pattern: str) -> bool:

Check whether the indexed string contains the given pattern.

Complexity

  • Time: O(|pattern| log σ)
  • Space: O(|pattern|)

Examples

fm.contains("issi")
# True
def count(self, pattern: str) -> int:

Count how many times a pattern appears in the indexed string.

Complexity

  • Time: O(|pattern| log σ)
  • Space: O(|pattern|)

Examples

fm.count("issi")
# 2
def locate(self, pattern: str) -> list[int]:

Locate all starting positions of the pattern in the indexed string.
⚠️ Order of returned positions is not guaranteed.

Complexity

  • Time: O((|pattern| + |count|) log σ)
  • Space: O(|pattern| + |count|)

Examples

fm.locate("issi")
# [4, 1]
def startswith(self, prefix: str) -> bool:

Check if the indexed string starts with the given prefix.

Complexity

  • Time: O(|prefix| log σ)
  • Space: O(|prefix|)

Examples

fm.startswith("mi")
# True
def endswith(self, suffix: str) -> bool:

Check if the indexed string ends with the given suffix.

Complexity

  • Time: O(|suffix| log σ)
  • Space: O(|suffix|)

where:

  • |suffix| = length of the suffix
  • σ = size of the alphabet (2⁸ for UCS-1, 2¹⁶ for UCS-2, etc.)

Examples

fm.endswith("pi")
# True
class MultiFMIndex:

A multi-document FM-index for fast substring search across multiple strings.

Internally, all strings are concatenated with separators and indexed as a single FM-index,
while preserving the ability to map matches back to their original documents.

Construction

Time / Space Complexity

  • Time: O(S log σ)
  • Space: O(S log σ)

where:

  • S = total length of all indexed strings
  • σ = size of the alphabet (2⁸ for UCS-1, 2¹⁶ for UCS-2, etc.)
from fm_index import MultiFMIndex

mfm = MultiFMIndex(["abcabcabcabc", "xxabcabcxxabc", "abcababcabc"])
MultiFMIndex(data: Iterable[str])
def item(self) -> list[str]:

Convert the index back into the original list of strings.

Complexity

  • Time: O(S log σ)
  • Space: O(S)

Examples

mfm.item()
# ['abcabcabcabc', 'xxabcabcxxabc', 'abcababcabc']
def contains(self, pattern: str) -> bool:

Check if the pattern exists as a full document in the index.

Complexity

  • Time: O(|pattern| log σ)
  • Space: O(|pattern|)

Examples

mfm.contains("abcabcabcabc")
# True
def count_all(self, pattern: str) -> int:

Count total occurrences of a pattern across all documents.

Complexity

  • Time: O(|pattern| log σ)
  • Space: O(|pattern|)

Examples

mfm.count_all("abc")
# 10
def count(self, pattern: str) -> dict[int, int]:

Count occurrences per document.
Returns {doc_index: count}.

Complexity

  • Time: O((|pattern| + |total_count|) log σ)
  • Space: O(|pattern| + |output|)

Examples

mfm.count("abc")
# {0: 4, 1: 3, 2: 3}
def locate(self, pattern: str) -> dict[int, list[int]]:

Locate occurrences per document.
⚠️ Order is not guaranteed.

Complexity

  • Time: O((|pattern| + |total_count|) log σ)
  • Space: O(|pattern| + |total_count|)

Examples

mfm.locate("abc")
# {0: [9, 6, 3, 0], 1: [10, 2, 5], 2: [8, 0, 5]}
def startswith(self, prefix: str) -> list[int]:

List document indices whose content starts with the prefix.

Complexity

  • Time: O(|prefix| log σ)
  • Space: O(|prefix|)

Examples

mfm.startswith("abc")
# [2, 0]
def endswith(self, suffix: str) -> list[int]:

List document indices whose content ends with the suffix.

Complexity

  • Time: O(|suffix| log σ)
  • Space: O(|suffix|)

Examples

mfm.endswith("abc")
# [2, 1, 0]