CROSS ENTROPY - beyondnlp/nlp GitHub Wiki
- torch.exp()λ₯Ό μ¬μ©νλ μ΄μ
- focal_lossμ 보면 torch.exp()λ₯Ό μ¬μ©νλ κ²½μ°κ° μλ€. μ§μν¨μλ μλμ κ°μ΄ λͺ¨λ yκ°μ΄ 0 μ΄μμ΄λ€. λ°λΌμ μμμ μμκ° μλ κ°μ λͺ¨λ μμλ‘ μΉνν λ torch.exp()λ₯Ό μ μ©ν΄ μ€λ€.
- https://velog.io/@heaseo/Focalloss-%EC%84%A4%EB%AA%85
import random
import torch
import torch.nn as nn
import numpy as np
answer=[1,5]
prob=[]
for i in range( 2 ):
plist=[]
for j in range(10):
rand = random.random()
plist.append( rand )
prob.append(plist)
def cross_entropy1( prob, answer ):
output = torch.Tensor( prob )
target = torch.LongTensor(answer)
criterion = nn.CrossEntropyLoss()
loss = criterion(output, target)
return loss
def cross_entropy2( prob, answer ):
loss=0
for i in range( len(answer) ):
target = answer[i]
output = prob[i]
idx = target
loss += np.log(sum(np.exp(output))) - output[idx]
loss = loss/len(answer)
return loss
def cross_entropy3(y, t):
y = np.array(y)
t = np.array(t)
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# νλ ¨ λ°μ΄ν°κ° μ-ν« λ²‘ν°λΌλ©΄ μ λ΅ λ μ΄λΈμ μΈλ±μ€λ‘ λ°ν
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
loss1 = cross_entropy1( prob, answer );
loss2 = cross_entropy2( prob, answer );
loss3 = cross_entropy3( prob, answer );
print(f"loss1 : {loss1}")
print(f"loss2 : {loss2}")
print(f"loss3 : {loss2}")
-
hf compute_loss :
-
numpy array slicing idx1=[0, 1] idx2=[3, 5]
prob[ idx1, idx2] => prob[0,3] => prob[1,5] -
골λΉν΄μ»€μ 3λΆ λ₯λ¬λ ( 63 page μ°Έκ³ )
μ λ΅(Y)μ΄ μλμ κ°κ³
[ 1 0 0 ], [ 0 0 1 ](/beyondnlp/nlp/wiki/-1-0-0-],-[-0-0-1-)
>>> Y=[1,0,0],[0,0,1](/beyondnlp/nlp/wiki/1,0,0],[0,0,1)
λͺ¨λΈ(Model)μμ λμ¨ κ°μ΄
[ 0.7 0.2 0.1 ],[0.2 0.3 0.5 ](/beyondnlp/nlp/wiki/-0.7-0.2-0.1-],[0.2-0.3-0.5-)μ²λΌ λμ€λ©΄
>> model=[0.7,0.2,0.1],[0.2,0.3.0.5]]
μ΄ λ κ°μ μ΄μ©νμ¬ CROSS ENTROPYλ₯Ό κ³μ°ν μ μλ€.
μ λ΅μ λͺ¨λΈμ κ²°κ³Όμ tf.log(model)μ·¨ν κ°μ κ³±νλ€.
>>> log_model=tf.log(model)
>>> sess=tf.Session()
>>> sess.run(output)
array([[-0.35667497, -1.609438 , -2.3025851 ],
[-1.609438 , -1.2039728 , -0.6931472 ]], dtype=float32)
Y * tf.log(Model)
μ΄μ λ κ° νλ³λ‘ λνλ©΄ λλ€( reduce_sum(axis=1) )
>>> output=Y*log_model
>>> sess.run(output)
array([[-0.35667497, -0. , -0. ],
[-0. , -0. , -0.6931472 ]], dtype=float32)
>>> a=tf.reduce_mean(output,1)
>>> sess.run(a)
array([-0.11889166, -0.23104906], dtype=float32)