Module 2 1 Probability Theory and Bayes Rule - iffatAGheyas/NLP-handbook GitHub Wiki

Probability theory provides the mathematical foundation for modeling uncertainty in language. This section covers basic definitions, conditional probability, and Bayes’ theorem.


Key Concepts

image image image

###**** conditional_probability_demo.ipynb

def conditional_prob(p_a, p_b_given_a, p_b):
    """
    Compute the conditional probability P(A|B) using Bayes' theorem:
        P(A|B) = P(B|A) * P(A) / P(B)

    :param p_a:          P(A)
    :param p_b_given_a:  P(B|A)
    :param p_b:          P(B)
    :return:             P(A|B)
    """
    if p_b == 0:
        raise ValueError("P(B) must be non-zero")
    return (p_b_given_a * p_a) / p_b

# --- Example usage ---
if __name__ == "__main__":
    # 1. Define prior probabilities
    p_spam      = 0.4    # P(Spam)
    p_not_spam  = 1 - p_spam

    # 2. Likelihoods: P("offer" | Spam) and P("offer" | Ham)
    p_word_given_spam = 0.8    # e.g. the word "offer" appears 80% in spam
    p_word_given_ham  = 0.1    # e.g. the word "offer" appears 10% in ham

    # 3. Compute marginal P("offer")
    p_word = (p_word_given_spam * p_spam) + (p_word_given_ham * p_not_spam)

    # 4. Compute posterior P(Spam | "offer")
    p_spam_given_word = conditional_prob(p_spam, p_word_given_spam, p_word)

    # 5. Display the result
    print(f"P(Spam)                    = {p_spam:.3f}")
    print(f"P('offer' | Spam)          = {p_word_given_spam:.3f}")
    print(f"P('offer' | Ham)           = {p_word_given_ham:.3f}")
    print(f"P('offer')                 = {p_word:.3f}")
    print(f"P(Spam | 'offer')          = {p_spam_given_word:.3f}")

Output:

image

2. Example: Naïve Bayes Text Classification

A toy Naïve Bayes classifier for two classes (“spam” vs. “ham”) using word frequencies:

import math
from collections import Counter

# Toy training data
docs = [
    ("spam", "limited time offer win money now"),
    ("ham",  "meet me for lunch tomorrow"),
    ("spam", "win a free ticket"),
    ("ham",  "project meeting at 10 am"),
]

# Estimate priors and likelihoods
labels, texts = zip(*docs)
prior = {lab: count/len(labels) for lab, count in Counter(labels).items()}
word_counts = {lab: Counter() for lab in prior}
total_words = {lab: 0 for lab in prior}

for lab, text in docs:
    tokens = text.split()
    word_counts[lab].update(tokens)
    total_words[lab] += len(tokens)

vocab_size = len({w for cnt in word_counts.values() for w in cnt})

def predict(text):
    tokens = text.split()
    scores = {}
    for lab in prior:
        # log P(label)
        score = math.log(prior[lab])
        # add log P(word|label) with Laplace smoothing
        for t in tokens:
            word_prob = (word_counts[lab][t] + 1) / (total_words[lab] + vocab_size)
            score += math.log(word_prob)
        scores[lab] = score
    return max(scores, key=scores.get)

# Demo
for text in ["free money now", "lunch tomorrow"]:
    print(f"'{text}' → {predict(text)}")

image

Continue to Module 2: Probability and Statistics for NLP