Module 1 4 Phase Structure Trees - iffatAGheyas/NLP-handbook GitHub Wiki
Module 1.4: Basic Syntax – Phrase Structure Trees
Syntax studies how words combine into phrases and sentences. Phrase structure trees visually represent hierarchical structure defined by a Context-Free Grammar (CFG).
Key Concepts
- Context-Free Grammar (CFG)
Phrase Structure Rules
- Parse Tree
A tree showing expansion from the start symbol to terminals via CFG rules.
1. Defining a Grammar and Parsing
import nltk
from nltk import CFG
from nltk.parse import RecursiveDescentParser
# Define a simple CFG
grammar = CFG.fromstring("""
S -> NP VP
NP -> Det N
VP -> V NP
Det -> 'the' | 'a'
N -> 'dog' | 'cat'
V -> 'sees' | 'pets'
""")
# Initialize parser and parse a sentence
parser = RecursiveDescentParser(grammar)
sentence = "the dog sees a cat".split()
trees = list(parser.parse(sentence))
# Display the parse tree(s)
for tree in trees:
tree.pretty_print()
2. Exporting a Graphical Parse Tree
# tree_matplotlib_demo.ipynb
import os
import matplotlib.pyplot as plt
from nltk import Tree
def save_tree_png_no_graphviz(tree: Tree, filename: str):
"""
Render an nltk.Tree to PNG using matplotlib only.
"""
# 1. Compute (x,y) positions for each node
x_coords = {}
y_coords = {}
leaf_counter = [0] # mutable counter
def _layout(t, depth=0):
"""Recursively assign coords; returns x-position for t."""
# If interior node, lay out children first
if isinstance(t, Tree) and len(t) > 0:
child_x = [_layout(child, depth+1) for child in t]
x = sum(child_x) / len(child_x)
else:
# leaf: assign next available x
x = leaf_counter[0]
leaf_counter[0] += 1
x_coords[id(t)] = x
y_coords[id(t)] = -depth
return x
_layout(tree)
# 2. Draw with matplotlib
fig, ax = plt.subplots(figsize=(8, 6))
def _draw(t):
idx = id(t)
x, y = x_coords[idx], y_coords[idx]
# Node label
label = t.label() if isinstance(t, Tree) else t
ax.text(x, y, label,
ha='center', va='center',
bbox=dict(boxstyle='round,pad=0.3', fc='white', ec='black', lw=1))
# Edges to children
if isinstance(t, Tree):
for child in t:
cx, cy = x_coords[id(child)], y_coords[id(child)]
ax.plot([x, cx], [y, cy], '-', color='black')
_draw(child)
_draw(tree)
ax.axis('off')
plt.tight_layout()
# 3. Ensure output dir exists
out_dir = os.path.dirname(filename)
if out_dir and not os.path.exists(out_dir):
os.makedirs(out_dir)
# 4. Save and close
fig.savefig(filename, dpi=300)
plt.close(fig)
print(f"Saved tree PNG to {filename!r}")
# --- Usage (assuming you have `trees` from your parser) ---
save_tree_png_no_graphviz(trees[0], "images/module1_4_tree.png")
Continue to Module 2: Probability and Statistics for NLP