Ingredients_Methodologies_Mermaid - TheEvergreenStateCollege/upper-division-cs-23-24 GitHub Wiki

graph TB
    attn_scores[attn_scores]--> saweights[self_attention_weight] 
    queries[queries]--> attn_scores[attn_scores]
    keys[keys]--> attn_scores[attn_scores]

    softmaxq(What is softmax?)-->softmaxa(Softmax is...)
    q("Is there a reason it is queries @ key.T, instead of keys @ queries.T? or even queries.T @ keys?")-->n(ok)

    maskq("<b>Masking</b> attn_scores.mask_fill()")--Masking is the process<br/>of hiding future input tokens<br/>in the matrix -->maska(ok)
    casat(Casual Attention or <b>Masked</b> Attention)-->maskq

Loading
graph TB
    subgraph Casual_Attention
        direction LR
        top2[top] --> bottom2[bottom]
        
        subgraph Attention Scores
        direction LR
        top3[Attnetion Scores = queries @ keys.T] --> bottom3[bottom]
        subgraph Keys
        direction TB
        top4["nn.Linear(d_in, d_out, bias=qkv_bias)"] --> bottom4["The 'value' in<br/> this context<br/> is similar to the value<br/> in a key-value <br/>pair in a database.<br/> It represents the <br/>actual content <br/>or representation of <br/>the input items.<br/> Once the model <br/>determines which keys<br/> (and thus which parts<br/> of the input)<br/> are most relevant to<br/> the query (the<br/> current focus item),<br/> it retrieves the<br/> corresponding values."]
        end
        subgraph Queries
        direction TB
        top5["nn.Linear(d_in, d_out, bias=qkv_bias)"] --> bottom5["A 'query' is analogous<br/> to a search query in<br/> a database. It<br/> represents the current<br/> item (e.g., a word<br/> or<br/> token in a sentence)<br/> the model focuses on or<br/> tries to understand.<br/> The query is used<br/> to probe the other<br/> parts of the input<br/> sequence to determine<br/> how much<br/> attention to pay<br/> to them.<br/>"]
        end
        Queries -- @ --> Keys
        end
    end

    

    classDef green fill:#9f6,stroke:#333,stroke-width:2px;
    classDef orange fill:#f96,stroke:#333,stroke-width:4px;
    classDef att_score fill:salmon,stroke:#333,stroke-width:3px,color:white;
    classDef keys_theme fill:rosybrown,stroke:#333,stroke-width:3px,color:white;
    classDef blue_b fill:powderblue;
    class attn_scores,saweights green
    class keys orange
    class Keys,top5,bottom5 keys_theme
    class Queries,top4,bottom4 att_score
    class Casual_Attention blue_b


Loading
⚠️ **GitHub.com Fallback** ⚠️