Module 2 1 Probability Theory and Bayes Rule - iffatAGheyas/NLP-handbook GitHub Wiki
Probability theory provides the mathematical foundation for modeling uncertainty in language. This section covers basic definitions, conditional probability, and Bayes’ theorem.
Key Concepts
###**** conditional_probability_demo.ipynb
def conditional_prob(p_a, p_b_given_a, p_b):
"""
Compute the conditional probability P(A|B) using Bayes' theorem:
P(A|B) = P(B|A) * P(A) / P(B)
:param p_a: P(A)
:param p_b_given_a: P(B|A)
:param p_b: P(B)
:return: P(A|B)
"""
if p_b == 0:
raise ValueError("P(B) must be non-zero")
return (p_b_given_a * p_a) / p_b
# --- Example usage ---
if __name__ == "__main__":
# 1. Define prior probabilities
p_spam = 0.4 # P(Spam)
p_not_spam = 1 - p_spam
# 2. Likelihoods: P("offer" | Spam) and P("offer" | Ham)
p_word_given_spam = 0.8 # e.g. the word "offer" appears 80% in spam
p_word_given_ham = 0.1 # e.g. the word "offer" appears 10% in ham
# 3. Compute marginal P("offer")
p_word = (p_word_given_spam * p_spam) + (p_word_given_ham * p_not_spam)
# 4. Compute posterior P(Spam | "offer")
p_spam_given_word = conditional_prob(p_spam, p_word_given_spam, p_word)
# 5. Display the result
print(f"P(Spam) = {p_spam:.3f}")
print(f"P('offer' | Spam) = {p_word_given_spam:.3f}")
print(f"P('offer' | Ham) = {p_word_given_ham:.3f}")
print(f"P('offer') = {p_word:.3f}")
print(f"P(Spam | 'offer') = {p_spam_given_word:.3f}")
Output:
2. Example: Naïve Bayes Text Classification
A toy Naïve Bayes classifier for two classes (“spam” vs. “ham”) using word frequencies:
import math
from collections import Counter
# Toy training data
docs = [
("spam", "limited time offer win money now"),
("ham", "meet me for lunch tomorrow"),
("spam", "win a free ticket"),
("ham", "project meeting at 10 am"),
]
# Estimate priors and likelihoods
labels, texts = zip(*docs)
prior = {lab: count/len(labels) for lab, count in Counter(labels).items()}
word_counts = {lab: Counter() for lab in prior}
total_words = {lab: 0 for lab in prior}
for lab, text in docs:
tokens = text.split()
word_counts[lab].update(tokens)
total_words[lab] += len(tokens)
vocab_size = len({w for cnt in word_counts.values() for w in cnt})
def predict(text):
tokens = text.split()
scores = {}
for lab in prior:
# log P(label)
score = math.log(prior[lab])
# add log P(word|label) with Laplace smoothing
for t in tokens:
word_prob = (word_counts[lab][t] + 1) / (total_words[lab] + vocab_size)
score += math.log(word_prob)
scores[lab] = score
return max(scores, key=scores.get)
# Demo
for text in ["free money now", "lunch tomorrow"]:
print(f"'{text}' → {predict(text)}")
Continue to Module 2: Probability and Statistics for NLP