Module 2 4 Evaluation Cross Entropy and Perplexity - iffatAGheyas/NLP-handbook GitHub Wiki

Module 2.4: Language Model Evaluation – Cross-Entropy & Perplexity

Language model evaluation quantifies how well a model predicts unseen text. Two common metrics:

Cross-Entropy

Perplexity

1. Build a Bigram Model with Laplace Smoothing

import math
from collections import Counter, defaultdict

# Toy training corpus
corpus = [
    "the cat sat on the mat",
    "the dog sat on the log",
    "the cat saw the dog"
]

# 1. Count unigrams & bigrams
unigrams = Counter()
bigrams  = Counter()
for sent in corpus:
    tokens = sent.split()
    unigrams.update(tokens)
    bigrams.update(zip(tokens, tokens[1:]))

# Vocabulary size
V = len(unigrams)

# 2. Build Laplace-smoothed bigram probabilities
bi_probs = defaultdict(float)
for prev in unigrams:
    for curr in unigrams:
        count_bg = bigrams.get((prev, curr), 0)
        bi_probs[(prev, curr)] = (count_bg + 1) / (unigrams[prev] + V)

# 3. Print some example bigram probabilities
print(f"Vocabulary size: {V}\n")
examples = [('the','cat'), ('the','dog'), ('cat','sat'), ('dog','sat')]
for prev, curr in examples:
    p = bi_probs[(prev, curr)]
    print(f"P({curr!r} | {prev!r}) = {p:.3f}")
    
# 4. Show the top-5 highest-probability bigrams
print("\nTop 5 bigrams by smoothed probability:")
# Sort by probability, descending
top5 = sorted(bi_probs.items(), key=lambda x: x[1], reverse=True)[:5]
for (prev, curr), p in top5:
    print(f"P({curr!r} | {prev!r}) = {p:.3f}")

Output:

2. Cross-Entropy & Perplexity Functions

def cross_entropy(sentence, bi_probs, unigrams, V):
    tokens = sentence.split()
    N = len(tokens)
    # P(w1) approximated by unigram MLE
    p1 = unigrams[tokens[0]] / sum(unigrams.values())
    log_prob = math.log(p1)
    # Chain rule for the rest
    for prev, curr in zip(tokens, tokens[1:]):
        p = bi_probs.get((prev, curr), 1/V)
        log_prob += math.log(p)
    return -log_prob / N

def perplexity(sentence, bi_probs, unigrams, V):
    H = cross_entropy(sentence, bi_probs, unigrams, V)
    return math.exp(H)

# Demo on test sentences
test_sentences = [
    "the cat sat on the log",
    "the dog sees the cat"
]

for sent in test_sentences:
    H   = cross_entropy(sent, bi_probs, unigrams, V)
    PPL = perplexity(sent, bi_probs, unigrams, V)
    print(f"Sentence: '{sent}'")
    print(f"Cross-Entropy = {H:.3f}")
    print(f"Perplexity    = {PPL:.3f}\n")

Output:

Continue to Module 2.5: Naïve Bayes Text Classifier