Module 1 3 Finite State Automata - iffatAGheyas/NLP-handbook GitHub Wiki
Module 1.3: Finite-State Automata – Concept & Implementation
Finite-State Automata (FSA) are simple state-machines that accept or reject sequences of symbols. They’re the backbone of tokenizers, spell-checkers, lexical analyzers, and more.
1. DIY DFA in Python
Here’s a minimal DFA implementation that recognizes binary strings ending in “01”:
# dfa_demo.ipynb
# 1. Define the DFA components
states = {'q0', 'q1', 'q2'} # q0 = start, q2 = accepting
alphabet = {'0', '1'}
start_state = 'q0'
accepting = {'q2'}
transitions = {
('q0', '0'): 'q1', ('q0', '1'): 'q0',
('q1', '0'): 'q1', ('q1', '1'): 'q2',
('q2', '0'): 'q1', ('q2', '1'): 'q0',
}
# 2. DFA class definition
class DFA:
def __init__(self, states, alphabet, transitions, start, accepting):
"""
:param states: set of states
:param alphabet: set of symbols
:param transitions: dict mapping (state, symbol) -> next_state
:param start: start state
:param accepting: set of accepting states
"""
self.states = states
self.alphabet = alphabet
self.trans = transitions
self.start = start
self.accepting = accepting
def accepts(self, s: str) -> bool:
"""
Return True if the DFA accepts the input string s.
"""
state = self.start
for ch in s:
# If there's no valid transition, reject
if (state, ch) not in self.trans:
return False
state = self.trans[(state, ch)]
# Accept if ending state is in the set of accepting states
return state in self.accepting
# 3. Demo / Tests
if __name__ == "__main__":
dfa = DFA(states, alphabet, transitions, start_state, accepting)
tests = ['01', '101', '1101', '100', '010', ''] # include empty string test
print("String → Accept?")
print("-" * 20)
for t in tests:
result = dfa.accepts(t)
print(f"{repr(t):>5} → {result}")
Output:
2. Extending to NFAs
from collections import defaultdict
class NFA:
def __init__(self, states, alphabet, transitions, start, accepting):
self.states = states
self.alphabet = alphabet | {'ε'}
self.trans = defaultdict(set, transitions) # (state, symbol) → set of states
self.start = start
self.accepting = accepting
def accepts(self, s: str) -> bool:
"""Simulate NFA with ε-moves."""
current = {self.start}
# ε-closure
stack = list(current)
while stack:
q = stack.pop()
for r in self.trans[(q,'ε')]:
if r not in current:
current.add(r); stack.append(r)
# consume input
for ch in s:
next_states = set()
for q in current:
next_states |= self.trans[(q,ch)]
# ε-closure on next_states
stack = list(next_states)
while stack:
q = stack.pop()
for r in self.trans[(q,'ε')]:
if r not in next_states:
next_states.add(r); stack.append(r)
current = next_states
return bool(current & self.accepting)
# Example NFA for language: (ab|a)*
nfa_trans = {
('q0','a'): {'q0','q1'},
('q1','b'): {'q0'},
}
nfa = NFA({'q0','q1'},{'a','b'}, nfa_trans, 'q0', {'q0'})
print(nfa.accepts('abab')) # True
print(nfa.accepts('aba')) # True
print(nfa.accepts('abb')) # False
Output:
Continue to 1.4 Basic Syntax: Phrase Structure Trees