graph TB
subgraph Casual_Attention
direction LR
top2[top] --> bottom2[bottom]
subgraph Attention Scores
direction LR
top3[Attnetion Scores = queries @ keys.T] --> bottom3[bottom]
subgraph Keys
direction TB
top4["nn.Linear(d_in, d_out, bias=qkv_bias)"] --> bottom4["The 'value' in<br/> this context<br/> is similar to the value<br/> in a key-value <br/>pair in a database.<br/> It represents the <br/>actual content <br/>or representation of <br/>the input items.<br/> Once the model <br/>determines which keys<br/> (and thus which parts<br/> of the input)<br/> are most relevant to<br/> the query (the<br/> current focus item),<br/> it retrieves the<br/> corresponding values."]
end
subgraph Queries
direction TB
top5["nn.Linear(d_in, d_out, bias=qkv_bias)"] --> bottom5["A 'query' is analogous<br/> to a search query in<br/> a database. It<br/> represents the current<br/> item (e.g., a word<br/> or<br/> token in a sentence)<br/> the model focuses on or<br/> tries to understand.<br/> The query is used<br/> to probe the other<br/> parts of the input<br/> sequence to determine<br/> how much<br/> attention to pay<br/> to them.<br/>"]
end
Queries -- @ --> Keys
end
end
classDef green fill:#9f6,stroke:#333,stroke-width:2px;
classDef orange fill:#f96,stroke:#333,stroke-width:4px;
classDef att_score fill:salmon,stroke:#333,stroke-width:3px,color:white;
classDef keys_theme fill:rosybrown,stroke:#333,stroke-width:3px,color:white;
classDef blue_b fill:powderblue;
class attn_scores,saweights green
class keys orange
class Keys,top5,bottom5 keys_theme
class Queries,top4,bottom4 att_score
class Casual_Attention blue_b