Module 1 3 Finite State Automata - iffatAGheyas/NLP-handbook GitHub Wiki

Module 1.3: Finite-State Automata – Concept & Implementation

Finite-State Automata (FSA) are simple state-machines that accept or reject sequences of symbols. They’re the backbone of tokenizers, spell-checkers, lexical analyzers, and more.

1. DIY DFA in Python

Here’s a minimal DFA implementation that recognizes binary strings ending in “01”:

# dfa_demo.ipynb

# 1. Define the DFA components
states      = {'q0', 'q1', 'q2'}       # q0 = start, q2 = accepting
alphabet    = {'0', '1'}
start_state = 'q0'
accepting   = {'q2'}
transitions = {
    ('q0', '0'): 'q1',  ('q0', '1'): 'q0',
    ('q1', '0'): 'q1',  ('q1', '1'): 'q2',
    ('q2', '0'): 'q1',  ('q2', '1'): 'q0',
}

# 2. DFA class definition
class DFA:
    def __init__(self, states, alphabet, transitions, start, accepting):
        """
        :param states: set of states
        :param alphabet: set of symbols
        :param transitions: dict mapping (state, symbol) -> next_state
        :param start: start state
        :param accepting: set of accepting states
        """
        self.states = states
        self.alphabet = alphabet
        self.trans = transitions
        self.start = start
        self.accepting = accepting

    def accepts(self, s: str) -> bool:
        """
        Return True if the DFA accepts the input string s.
        """
        state = self.start
        for ch in s:
            # If there's no valid transition, reject
            if (state, ch) not in self.trans:
                return False
            state = self.trans[(state, ch)]
        # Accept if ending state is in the set of accepting states
        return state in self.accepting

# 3. Demo / Tests
if __name__ == "__main__":
    dfa = DFA(states, alphabet, transitions, start_state, accepting)
    tests = ['01', '101', '1101', '100', '010', '']  # include empty string test
    print("String → Accept?")
    print("-" * 20)
    for t in tests:
        result = dfa.accepts(t)
        print(f"{repr(t):>5} → {result}")

Output:

2. Extending to NFAs

from collections import defaultdict

class NFA:
    def __init__(self, states, alphabet, transitions, start, accepting):
        self.states    = states
        self.alphabet  = alphabet | {'ε'}
        self.trans     = defaultdict(set, transitions)  # (state, symbol) → set of states
        self.start     = start
        self.accepting = accepting

    def accepts(self, s: str) -> bool:
        """Simulate NFA with ε-moves."""
        current = {self.start}
        # ε-closure
        stack = list(current)
        while stack:
            q = stack.pop()
            for r in self.trans[(q,'ε')]:
                if r not in current:
                    current.add(r); stack.append(r)
        # consume input
        for ch in s:
            next_states = set()
            for q in current:
                next_states |= self.trans[(q,ch)]
            # ε-closure on next_states
            stack = list(next_states)
            while stack:
                q = stack.pop()
                for r in self.trans[(q,'ε')]:
                    if r not in next_states:
                        next_states.add(r); stack.append(r)
            current = next_states
        return bool(current & self.accepting)

# Example NFA for language: (ab|a)*
nfa_trans = {
    ('q0','a'): {'q0','q1'},
    ('q1','b'): {'q0'},
}
nfa = NFA({'q0','q1'},{'a','b'}, nfa_trans, 'q0', {'q0'})
print(nfa.accepts('abab'))  # True
print(nfa.accepts('aba'))   # True
print(nfa.accepts('abb'))   # False

Output:

Continue to 1.4 Basic Syntax: Phrase Structure Trees