[MCP&A2A] 15. 비용 추적 및 관리 - k82022603/k82022603.github.io GitHub Wiki
[MCP&A2A] 15. 비용 추적 및 관리
개요
AI 시스템의 운영 비용은 예측하기 어렵고 빠르게 증가할 수 있습니다. 이 장에서는 LLM API 호출, 임베딩 생성, 토큰 사용량을 정밀하게 추적하고 제어하는 방법을 다룹니다.
왜 비용 추적이 중요한가?
일반적인 AI 프로젝트 비용 증가 패턴:
월 1일: $100
월 7일: $500 (5배 증가!)
월 14일: $2,000 (20배 증가!!)
월 21일: $5,000 (50배 증가!!!)
월 30일: $12,000 (120배 증가!!!!)
원인:
- 예상치 못한 트래픽 급증
- 중복 API 호출
- 비효율적인 프롬프트
- 캐싱 미적용
- 무한 루프
1. 비용 구조 분석
1.1 AI 시스템 비용 구성
총 비용 = LLM 비용 + 임베딩 비용 + 인프라 비용
LLM 비용 (50-70%)
├─ Input Tokens × $0.01/1K
├─ Output Tokens × $0.03/1K
└─ 모델별 차등 (GPT-4 > GPT-3.5)
임베딩 비용 (20-30%)
├─ text-embedding-ada-002: $0.0001/1K tokens
├─ nomic-embed-text: 무료 (self-hosted)
└─ 벡터 DB 저장 비용
인프라 비용 (10-20%)
├─ PostgreSQL: $50-200/월
├─ Redis: $20-100/월
├─ Kubernetes: $100-500/월
└─ 네트워크: $10-50/월
1.2 비용 최적화 우선순위
| 순위 | 항목 | 잠재 절감 | 난이도 |
|---|---|---|---|
| 1 | LLM 캐싱 | 60-80% | 낮음 |
| 2 | 프롬프트 최적화 | 30-50% | 중간 |
| 3 | 임베딩 캐싱 | 80-90% | 낮음 |
| 4 | 모델 선택 | 40-60% | 낮음 |
| 5 | 배치 처리 | 20-30% | 중간 |
| 6 | 토큰 제한 | 10-20% | 낮음 |
2. 데이터베이스 스키마
2.1 토큰 사용량 테이블
-- 토큰 사용량 추적
CREATE TABLE token_usage (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- 테넌트 정보
tenant_id VARCHAR(100) NOT NULL,
user_id VARCHAR(100),
-- 서비스 정보
service_type VARCHAR(50) NOT NULL, -- 'llm', 'embedding', 'rag'
model_name VARCHAR(100) NOT NULL, -- 'gpt-4', 'gpt-3.5-turbo', 'ada-002'
-- 토큰 사용량
input_tokens INTEGER DEFAULT 0,
output_tokens INTEGER DEFAULT 0,
total_tokens INTEGER NOT NULL,
-- 비용 정보
cost_per_1k_input DECIMAL(10, 6),
cost_per_1k_output DECIMAL(10, 6),
total_cost DECIMAL(10, 4) NOT NULL,
-- 요청 정보
request_id VARCHAR(100),
endpoint VARCHAR(255),
-- 메타데이터
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
metadata JSONB
);
-- 인덱스
CREATE INDEX idx_token_usage_tenant_created
ON token_usage(tenant_id, created_at DESC);
CREATE INDEX idx_token_usage_service
ON token_usage(service_type, created_at DESC);
CREATE INDEX idx_token_usage_user
ON token_usage(user_id, created_at DESC);
-- 파티셔닝 (대용량 데이터용)
-- CREATE TABLE token_usage_2024_01 PARTITION OF token_usage
-- FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
2.2 비용 예산 테이블
-- 테넌트별 비용 예산
CREATE TABLE cost_budgets (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id VARCHAR(100) NOT NULL UNIQUE,
-- 예산 설정
daily_budget DECIMAL(10, 2),
monthly_budget DECIMAL(10, 2),
-- 현재 사용량
daily_spent DECIMAL(10, 2) DEFAULT 0,
monthly_spent DECIMAL(10, 2) DEFAULT 0,
-- 알림 임계값 (%)
warning_threshold INTEGER DEFAULT 80, -- 80% 도달 시 경고
critical_threshold INTEGER DEFAULT 95, -- 95% 도달 시 차단
-- 제한 설정
is_enabled BOOLEAN DEFAULT true,
max_requests_per_minute INTEGER DEFAULT 100,
-- 타임스탬프
daily_reset_at TIMESTAMP,
monthly_reset_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 인덱스
CREATE INDEX idx_cost_budgets_tenant ON cost_budgets(tenant_id);
2.3 비용 알림 로그
-- 비용 알림 이력
CREATE TABLE cost_alerts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id VARCHAR(100) NOT NULL,
-- 알림 정보
alert_type VARCHAR(50) NOT NULL, -- 'warning', 'critical', 'exceeded'
threshold_type VARCHAR(50) NOT NULL, -- 'daily', 'monthly'
-- 비용 정보
current_spent DECIMAL(10, 2),
budget_limit DECIMAL(10, 2),
usage_percentage DECIMAL(5, 2),
-- 상태
is_notified BOOLEAN DEFAULT false,
notified_at TIMESTAMP,
-- 메시지
message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 인덱스
CREATE INDEX idx_cost_alerts_tenant_created
ON cost_alerts(tenant_id, created_at DESC);
3. 비용 추적 구현
3.1 토큰 카운터
// internal/billing/token_counter.go
package billing
import (
"context"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
)
// TokenUsage : 토큰 사용량 기록
type TokenUsage struct {
ID string `json:"id"`
TenantID string `json:"tenant_id"`
UserID string `json:"user_id"`
ServiceType string `json:"service_type"`
ModelName string `json:"model_name"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
TotalTokens int `json:"total_tokens"`
CostPer1KInput float64 `json:"cost_per_1k_input"`
CostPer1KOutput float64 `json:"cost_per_1k_output"`
TotalCost float64 `json:"total_cost"`
RequestID string `json:"request_id"`
Endpoint string `json:"endpoint"`
CreatedAt time.Time `json:"created_at"`
}
// TokenCounter : 토큰 사용량 추적기
type TokenCounter struct {
db *pgxpool.Pool
}
func NewTokenCounter(db *pgxpool.Pool) *TokenCounter {
return &TokenCounter{db: db}
}
// RecordUsage : 토큰 사용량 기록
func (tc *TokenCounter) RecordUsage(ctx context.Context, usage *TokenUsage) error {
// 비용 계산
usage.TotalCost = tc.calculateCost(usage)
query := `
INSERT INTO token_usage (
id, tenant_id, user_id, service_type, model_name,
input_tokens, output_tokens, total_tokens,
cost_per_1k_input, cost_per_1k_output, total_cost,
request_id, endpoint, created_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14
)
`
_, err := tc.db.Exec(ctx, query,
uuid.New().String(),
usage.TenantID,
usage.UserID,
usage.ServiceType,
usage.ModelName,
usage.InputTokens,
usage.OutputTokens,
usage.TotalTokens,
usage.CostPer1KInput,
usage.CostPer1KOutput,
usage.TotalCost,
usage.RequestID,
usage.Endpoint,
time.Now(),
)
return err
}
// calculateCost : 비용 계산
func (tc *TokenCounter) calculateCost(usage *TokenUsage) float64 {
inputCost := float64(usage.InputTokens) / 1000.0 * usage.CostPer1KInput
outputCost := float64(usage.OutputTokens) / 1000.0 * usage.CostPer1KOutput
return inputCost + outputCost
}
// GetDailyUsage : 일일 사용량 조회
func (tc *TokenCounter) GetDailyUsage(ctx context.Context, tenantID string) (float64, error) {
query := `
SELECT COALESCE(SUM(total_cost), 0)
FROM token_usage
WHERE tenant_id = $1
AND created_at >= CURRENT_DATE
`
var totalCost float64
err := tc.db.QueryRow(ctx, query, tenantID).Scan(&totalCost)
return totalCost, err
}
// GetMonthlyUsage : 월간 사용량 조회
func (tc *TokenCounter) GetMonthlyUsage(ctx context.Context, tenantID string) (float64, error) {
query := `
SELECT COALESCE(SUM(total_cost), 0)
FROM token_usage
WHERE tenant_id = $1
AND created_at >= date_trunc('month', CURRENT_DATE)
`
var totalCost float64
err := tc.db.QueryRow(ctx, query, tenantID).Scan(&totalCost)
return totalCost, err
}
// GetUsageByService : 서비스별 사용량 조회
func (tc *TokenCounter) GetUsageByService(ctx context.Context, tenantID string, startDate, endDate time.Time) (map[string]float64, error) {
query := `
SELECT service_type, SUM(total_cost) as cost
FROM token_usage
WHERE tenant_id = $1
AND created_at BETWEEN $2 AND $3
GROUP BY service_type
`
rows, err := tc.db.Query(ctx, query, tenantID, startDate, endDate)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]float64)
for rows.Next() {
var serviceType string
var cost float64
if err := rows.Scan(&serviceType, &cost); err != nil {
return nil, err
}
result[serviceType] = cost
}
return result, nil
}
3.2 모델별 가격표
// internal/billing/pricing.go
package billing
// ModelPricing : 모델별 가격 정보
type ModelPricing struct {
InputCostPer1K float64
OutputCostPer1K float64
}
// PricingTable : 가격표
var PricingTable = map[string]ModelPricing{
// OpenAI GPT-4
"gpt-4": {
InputCostPer1K: 0.03,
OutputCostPer1K: 0.06,
},
"gpt-4-turbo": {
InputCostPer1K: 0.01,
OutputCostPer1K: 0.03,
},
// OpenAI GPT-3.5
"gpt-3.5-turbo": {
InputCostPer1K: 0.0005,
OutputCostPer1K: 0.0015,
},
// Anthropic Claude
"claude-3-opus": {
InputCostPer1K: 0.015,
OutputCostPer1K: 0.075,
},
"claude-3-sonnet": {
InputCostPer1K: 0.003,
OutputCostPer1K: 0.015,
},
"claude-3-haiku": {
InputCostPer1K: 0.00025,
OutputCostPer1K: 0.00125,
},
// Embeddings
"text-embedding-ada-002": {
InputCostPer1K: 0.0001,
OutputCostPer1K: 0.0,
},
"text-embedding-3-small": {
InputCostPer1K: 0.00002,
OutputCostPer1K: 0.0,
},
"text-embedding-3-large": {
InputCostPer1K: 0.00013,
OutputCostPer1K: 0.0,
},
}
// GetPricing : 모델 가격 조회
func GetPricing(modelName string) (ModelPricing, bool) {
pricing, exists := PricingTable[modelName]
return pricing, exists
}
// EstimateCost : 비용 예측
func EstimateCost(modelName string, inputTokens, outputTokens int) float64 {
pricing, exists := GetPricing(modelName)
if !exists {
return 0.0
}
inputCost := float64(inputTokens) / 1000.0 * pricing.InputCostPer1K
outputCost := float64(outputTokens) / 1000.0 * pricing.OutputCostPer1K
return inputCost + outputCost
}
4. 예산 관리
4.1 예산 매니저
// internal/billing/budget_manager.go
package billing
import (
"context"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
// CostBudget : 비용 예산
type CostBudget struct {
ID string `json:"id"`
TenantID string `json:"tenant_id"`
DailyBudget float64 `json:"daily_budget"`
MonthlyBudget float64 `json:"monthly_budget"`
DailySpent float64 `json:"daily_spent"`
MonthlySpent float64 `json:"monthly_spent"`
WarningThreshold int `json:"warning_threshold"`
CriticalThreshold int `json:"critical_threshold"`
IsEnabled bool `json:"is_enabled"`
MaxRequestsPerMinute int `json:"max_requests_per_minute"`
DailyResetAt time.Time `json:"daily_reset_at"`
MonthlyResetAt time.Time `json:"monthly_reset_at"`
}
// BudgetManager : 예산 관리자
type BudgetManager struct {
db *pgxpool.Pool
tokenCounter *TokenCounter
}
func NewBudgetManager(db *pgxpool.Pool, tc *TokenCounter) *BudgetManager {
return &BudgetManager{
db: db,
tokenCounter: tc,
}
}
// CheckBudget : 예산 확인
func (bm *BudgetManager) CheckBudget(ctx context.Context, tenantID string, estimatedCost float64) error {
// 예산 조회
budget, err := bm.GetBudget(ctx, tenantID)
if err != nil {
return err
}
if !budget.IsEnabled {
return nil
}
// 일일 예산 체크
dailyTotal := budget.DailySpent + estimatedCost
if budget.DailyBudget > 0 && dailyTotal > budget.DailyBudget {
return fmt.Errorf("daily budget exceeded: $%.2f / $%.2f",
dailyTotal, budget.DailyBudget)
}
// 월간 예산 체크
monthlyTotal := budget.MonthlySpent + estimatedCost
if budget.MonthlyBudget > 0 && monthlyTotal > budget.MonthlyBudget {
return fmt.Errorf("monthly budget exceeded: $%.2f / $%.2f",
monthlyTotal, budget.MonthlyBudget)
}
// 경고 임계값 체크
if budget.DailyBudget > 0 {
dailyPercentage := (dailyTotal / budget.DailyBudget) * 100
if dailyPercentage >= float64(budget.WarningThreshold) {
bm.createAlert(ctx, tenantID, "warning", "daily", dailyTotal, budget.DailyBudget)
}
}
return nil
}
// UpdateSpent : 사용량 업데이트
func (bm *BudgetManager) UpdateSpent(ctx context.Context, tenantID string, cost float64) error {
query := `
UPDATE cost_budgets
SET daily_spent = daily_spent + $1,
monthly_spent = monthly_spent + $1,
updated_at = CURRENT_TIMESTAMP
WHERE tenant_id = $2
`
_, err := bm.db.Exec(ctx, query, cost, tenantID)
return err
}
// ResetDailyBudget : 일일 예산 리셋
func (bm *BudgetManager) ResetDailyBudget(ctx context.Context) error {
query := `
UPDATE cost_budgets
SET daily_spent = 0,
daily_reset_at = CURRENT_TIMESTAMP
WHERE daily_reset_at < CURRENT_DATE
`
_, err := bm.db.Exec(ctx, query)
return err
}
// ResetMonthlyBudget : 월간 예산 리셋
func (bm *BudgetManager) ResetMonthlyBudget(ctx context.Context) error {
query := `
UPDATE cost_budgets
SET monthly_spent = 0,
monthly_reset_at = CURRENT_TIMESTAMP
WHERE monthly_reset_at < date_trunc('month', CURRENT_DATE)
`
_, err := bm.db.Exec(ctx, query)
return err
}
// GetBudget : 예산 조회
func (bm *BudgetManager) GetBudget(ctx context.Context, tenantID string) (*CostBudget, error) {
query := `
SELECT id, tenant_id, daily_budget, monthly_budget,
daily_spent, monthly_spent, warning_threshold,
critical_threshold, is_enabled, max_requests_per_minute
FROM cost_budgets
WHERE tenant_id = $1
`
budget := &CostBudget{}
err := bm.db.QueryRow(ctx, query, tenantID).Scan(
&budget.ID,
&budget.TenantID,
&budget.DailyBudget,
&budget.MonthlyBudget,
&budget.DailySpent,
&budget.MonthlySpent,
&budget.WarningThreshold,
&budget.CriticalThreshold,
&budget.IsEnabled,
&budget.MaxRequestsPerMinute,
)
return budget, err
}
// createAlert : 알림 생성
func (bm *BudgetManager) createAlert(ctx context.Context, tenantID, alertType, thresholdType string, currentSpent, budgetLimit float64) error {
usagePercentage := (currentSpent / budgetLimit) * 100
query := `
INSERT INTO cost_alerts (
tenant_id, alert_type, threshold_type,
current_spent, budget_limit, usage_percentage,
message
) VALUES ($1, $2, $3, $4, $5, $6, $7)
`
message := fmt.Sprintf(
"Budget %s alert: $%.2f / $%.2f (%.1f%%)",
alertType, currentSpent, budgetLimit, usagePercentage,
)
_, err := bm.db.Exec(ctx, query,
tenantID, alertType, thresholdType,
currentSpent, budgetLimit, usagePercentage,
message,
)
return err
}
5. 미들웨어 통합
5.1 비용 추적 미들웨어
// internal/middleware/billing_middleware.go
package middleware
import (
"context"
"net/http"
"practice-go-lang/internal/billing"
)
// BillingMiddleware : 비용 추적 미들웨어
func BillingMiddleware(
tokenCounter *billing.TokenCounter,
budgetManager *billing.BudgetManager,
) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// 테넌트 ID 추출
tenantID := getTenantID(ctx)
if tenantID == "" {
next.ServeHTTP(w, r)
return
}
// 예산 체크
estimatedCost := 0.01 // 최소 예상 비용
if err := budgetManager.CheckBudget(ctx, tenantID, estimatedCost); err != nil {
http.Error(w, "Budget limit exceeded", http.StatusPaymentRequired)
return
}
// ResponseWriter 래핑 (토큰 수 캡처)
bw := &billingWriter{
ResponseWriter: w,
tenantID: tenantID,
tokenCounter: tokenCounter,
budgetManager: budgetManager,
}
next.ServeHTTP(bw, r)
})
}
}
// billingWriter : 응답 래퍼
type billingWriter struct {
http.ResponseWriter
tenantID string
tokenCounter *billing.TokenCounter
budgetManager *billing.BudgetManager
}
// Write : 응답 기록
func (bw *billingWriter) Write(b []byte) (int, error) {
// 실제 응답에서 토큰 수 추출 (구현 필요)
// 여기서는 간단히 예시만
return bw.ResponseWriter.Write(b)
}
func getTenantID(ctx context.Context) string {
tenantID, _ := ctx.Value("tenant_id").(string)
return tenantID
}
6. 비용 리포트
6.1 일일 리포트
// internal/billing/reporter.go
package billing
import (
"context"
"time"
)
// DailyReport : 일일 비용 리포트
type DailyReport struct {
TenantID string `json:"tenant_id"`
Date string `json:"date"`
TotalCost float64 `json:"total_cost"`
TotalTokens int `json:"total_tokens"`
RequestCount int `json:"request_count"`
ByService map[string]float64 `json:"by_service"`
ByModel map[string]float64 `json:"by_model"`
TopUsers []UserCost `json:"top_users"`
}
// UserCost : 사용자별 비용
type UserCost struct {
UserID string `json:"user_id"`
Cost float64 `json:"cost"`
}
// Reporter : 비용 리포터
type Reporter struct {
tokenCounter *TokenCounter
}
func NewReporter(tc *TokenCounter) *Reporter {
return &Reporter{tokenCounter: tc}
}
// GenerateDailyReport : 일일 리포트 생성
func (r *Reporter) GenerateDailyReport(ctx context.Context, tenantID string, date time.Time) (*DailyReport, error) {
query := `
SELECT
COALESCE(SUM(total_cost), 0) as total_cost,
COALESCE(SUM(total_tokens), 0) as total_tokens,
COUNT(*) as request_count
FROM token_usage
WHERE tenant_id = $1
AND DATE(created_at) = $2
`
report := &DailyReport{
TenantID: tenantID,
Date: date.Format("2006-01-02"),
}
err := r.tokenCounter.db.QueryRow(ctx, query, tenantID, date).Scan(
&report.TotalCost,
&report.TotalTokens,
&report.RequestCount,
)
if err != nil {
return nil, err
}
// 서비스별 비용
report.ByService, _ = r.getByService(ctx, tenantID, date)
// 모델별 비용
report.ByModel, _ = r.getByModel(ctx, tenantID, date)
// Top 사용자
report.TopUsers, _ = r.getTopUsers(ctx, tenantID, date, 10)
return report, nil
}
// getByService : 서비스별 비용
func (r *Reporter) getByService(ctx context.Context, tenantID string, date time.Time) (map[string]float64, error) {
query := `
SELECT service_type, SUM(total_cost)
FROM token_usage
WHERE tenant_id = $1 AND DATE(created_at) = $2
GROUP BY service_type
`
rows, err := r.tokenCounter.db.Query(ctx, query, tenantID, date)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]float64)
for rows.Next() {
var serviceType string
var cost float64
rows.Scan(&serviceType, &cost)
result[serviceType] = cost
}
return result, nil
}
// getByModel : 모델별 비용
func (r *Reporter) getByModel(ctx context.Context, tenantID string, date time.Time) (map[string]float64, error) {
query := `
SELECT model_name, SUM(total_cost)
FROM token_usage
WHERE tenant_id = $1 AND DATE(created_at) = $2
GROUP BY model_name
`
rows, err := r.tokenCounter.db.Query(ctx, query, tenantID, date)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]float64)
for rows.Next() {
var modelName string
var cost float64
rows.Scan(&modelName, &cost)
result[modelName] = cost
}
return result, nil
}
// getTopUsers : Top 사용자
func (r *Reporter) getTopUsers(ctx context.Context, tenantID string, date time.Time, limit int) ([]UserCost, error) {
query := `
SELECT user_id, SUM(total_cost) as cost
FROM token_usage
WHERE tenant_id = $1 AND DATE(created_at) = $2
GROUP BY user_id
ORDER BY cost DESC
LIMIT $3
`
rows, err := r.tokenCounter.db.Query(ctx, query, tenantID, date, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var users []UserCost
for rows.Next() {
var uc UserCost
rows.Scan(&uc.UserID, &uc.Cost)
users = append(users, uc)
}
return users, nil
}
7. API 엔드포인트
7.1 비용 조회 API
// internal/handlers/billing_handler.go
package handlers
import (
"encoding/json"
"net/http"
"time"
"practice-go-lang/internal/billing"
)
// BillingHandler : 비용 조회 핸들러
type BillingHandler struct {
tokenCounter *billing.TokenCounter
budgetManager *billing.BudgetManager
reporter *billing.Reporter
}
func NewBillingHandler(tc *billing.TokenCounter, bm *billing.BudgetManager, r *billing.Reporter) *BillingHandler {
return &BillingHandler{
tokenCounter: tc,
budgetManager: bm,
reporter: r,
}
}
// GetDailyUsage : GET /api/billing/daily
func (h *BillingHandler) GetDailyUsage(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tenantID := getTenantID(ctx)
cost, err := h.tokenCounter.GetDailyUsage(ctx, tenantID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"tenant_id": tenantID,
"date": time.Now().Format("2006-01-02"),
"total_cost": cost,
})
}
// GetMonthlyUsage : GET /api/billing/monthly
func (h *BillingHandler) GetMonthlyUsage(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tenantID := getTenantID(ctx)
cost, err := h.tokenCounter.GetMonthlyUsage(ctx, tenantID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"tenant_id": tenantID,
"month": time.Now().Format("2006-01"),
"total_cost": cost,
})
}
// GetDailyReport : GET /api/billing/report/daily
func (h *BillingHandler) GetDailyReport(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tenantID := getTenantID(ctx)
// 날짜 파라미터 (기본: 오늘)
dateStr := r.URL.Query().Get("date")
var date time.Time
if dateStr == "" {
date = time.Now()
} else {
var err error
date, err = time.Parse("2006-01-02", dateStr)
if err != nil {
http.Error(w, "Invalid date format", http.StatusBadRequest)
return
}
}
report, err := h.reporter.GenerateDailyReport(ctx, tenantID, date)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(report)
}
// GetBudget : GET /api/billing/budget
func (h *BillingHandler) GetBudget(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tenantID := getTenantID(ctx)
budget, err := h.budgetManager.GetBudget(ctx, tenantID)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(budget)
}
8. 비용 최적화 전략
8.1 캐싱 전략
// 임베딩 캐싱으로 80% 절감
Before: $1000/월 (100만 호출)
After: $200/월 (캐시 히트율 80%)
// Redis 캐시 키 전략
key := fmt.Sprintf("emb:%s", md5(text))
ttl := 30 * 24 * time.Hour // 30일
8.2 프롬프트 최적화
Bad Prompt (500 tokens):
"I need you to analyze this document thoroughly and provide
a comprehensive summary that covers all the main points,
including background information, key findings, methodology,
results, and conclusions. Please make sure to be very detailed..."
Good Prompt (100 tokens):
"Summarize: background, findings, methodology, results, conclusions"
절감: 80% (500 → 100 tokens)
8.3 모델 선택
작업별 최적 모델:
간단한 분류: GPT-3.5 Turbo
복잡한 추론: GPT-4 Turbo
대량 처리: Claude Haiku
요약: Claude Sonnet
예시:
GPT-4: $0.03/1K input → GPT-3.5: $0.0005/1K
절감: 98%!
핵심 요약
비용 추적 체크리스트
✅ 데이터베이스
- token_usage 테이블
- cost_budgets 테이블
- cost_alerts 테이블
✅ 핵심 컴포넌트
- TokenCounter (사용량 기록)
- BudgetManager (예산 관리)
- Reporter (리포트 생성)
✅ 최적화 전략
- 임베딩 캐싱 (80% 절감)
- 프롬프트 최적화 (50% 절감)
- 모델 선택 (60% 절감)
✅ API 엔드포인트
- GET /api/billing/daily
- GET /api/billing/monthly
- GET /api/billing/report/daily
- GET /api/billing/budget
비용 절감 효과
| 최적화 | Before | After | 절감률 |
|---|---|---|---|
| 임베딩 캐싱 | $1000 | $200 | 80% |
| 프롬프트 단축 | $500 | $250 | 50% |
| 모델 변경 | $1000 | $400 | 60% |
| 배치 처리 | $300 | $210 | 30% |
작성일: 2024-12-13