π€ 4λ¨κ³ : LangChain κΈ°λ° λ©ν°μ€ν AI ꡬν κ²ν - 100-hours-a-week/7-team-ddb-wiki GitHub Wiki
λ³Έ μλΉμ€λ μ¬μ©μ μ
λ ₯μ κΈ°λ°μΌλ‘ μ₯μλ₯Ό μΆμ²νλ μμ€ν
μΌλ‘,
μ νν μΆμ²μ μν΄ μ¬λ¬ λ¨κ³μ μΆλ‘ κ³Ό μΈλΆ 리μμ€ νμ©μ΄ νμν¨.
λν, μ΅μ’
μΆμ² κ²°κ³Όλ λ€μκ³Ό κ°μ μ ν΄μ§ JSON μ€ν€λ§λ‘ λ°νλμ΄μΌ ν¨:
{
"data": [
{"place_id": 21, "similarity_score": 0.92},
...
]
}
μ΄λ₯Ό μν΄ LangChainμ AgentType.STRUCTURED_CHAT_ZERO_SHOTμ κΈ°λ°μΌλ‘ κ° μΆλ‘ λ¨κ³λ₯Ό ꡬ쑰νλ Toolλ‘ λΆλ¦¬νκ³ , Agentκ° μ΄ Toolλ€μ μλμΌλ‘ μ€ννλλ‘ κ΅¬μ±ν¨.
flowchart TD
A[μ¬μ©μ μ
λ ₯] --> B[Tool 1: ν€μλ μΆμΆ]
B --> C[Tool 2: μ μ¬ ν€μλ κ²μ]
C --> D[Tool 3: κ°μ€ νκ· μλ² λ© μμ±]
D --> E[Tool 4: μ₯μ λ²‘ν° DB κ²μ]
E --> F[ꡬ쑰νλ μΆμ² κ²°κ³Ό λ°ν]
- κΈ°λ₯: LLM μ¬μ©μ μμ°μ΄ μ λ ₯μμ μ₯μ μΆμ²μ νμν ν€μλ λ° μ΄μ© μκ° μΆμΆ
- μ λ ₯ μ€ν€λ§:
{
"user_input": "λ΄μΌ μ λ
μ λΆμκΈ° μ’μ λ°μ΄νΈ μ₯μ μΆμ²ν΄μ€"
}
- μΆλ ₯ μ€ν€λ§:
{
"keywords": ["λ°μ΄νΈ", "κ°μ±", "μΌκ²½"]
}
- κΈ°λ₯: μΆμΆλ ν€μλλ₯Ό 벑ν°ννμ¬ λ²‘ν° DBμμ μ μ¬ ν€μλλ₯Ό κ²μ
- μ λ ₯ μ€ν€λ§:
{
"keywords": ["λ°μ΄νΈ", "κ°μ±", "μΌκ²½"]
}
- μΆλ ₯ μ€ν€λ§:
{
"similar_keywords": [
{"keyword": "λ‘맨ν±", "score": 0.87},
{"keyword": "λΆμκΈ°", "score": 0.85}
]
}
- κΈ°λ₯: μ μ¬ ν€μλμ μ μ¬λλ₯Ό κ°μ€μΉλ‘ μ¬μ©ν΄ νκ· μλ² λ© λ²‘ν° μμ±
- μ λ ₯ μ€ν€λ§:
{
"similar_keywords": [
{"keyword": "λ‘맨ν±", "score": 0.87},
{"keyword": "λΆμκΈ°", "score": 0.85}
]
}
- μΆλ ₯ μ€ν€λ§:
{
"vector": [0.125, -0.098, 0.234, ...]
}
- κΈ°λ₯: μ¬μ©μ 벑ν°μ μ΄μ© μκ°μ κΈ°μ€μΌλ‘ μ₯μ λ²‘ν° DBμμ μ μ¬ μ₯μ μΆμ²
- μ λ ₯ μ€ν€λ§:
{
"vector": [...]
}
- μΆλ ₯ μ€ν€λ§ (μ΅μ’ μλ΅):
{
"data": [
{"place_id": 21, "similarity_score": 0.92},
{"place_id": 36, "similarity_score": 0.86},
...
]
}
νλͺ© | κΈ°μ | μ€λͺ |
---|---|---|
LLM | Gemini (via GoogleGenerativeAI) | μ¬μ©μ μ λ ₯ μ΄ν΄ λ° ν€μλ μΆμΆ |
Embedding | KR-SBERT | ν€μλ λ° μ₯μ 벑ν°ν |
Vector DB | Chroma | μ μ¬ ν€μλ λ° μ₯μ κ²μ |
LangChain κ΅¬μ± | Agent + StructuredTool + Pydantic | Tool κΈ°λ° μ²΄μΈ μλ μ€ν |
-
AgentType.STRUCTURED_CHAT_ZERO_SHOT
μ¬μ© - κ° Toolμ
StructuredTool.from_function(...)
μΌλ‘ λ±λ‘ - LLMμ μμ°μ΄ μ λ ₯μ λ°νμΌλ‘ Toolμ μμ°¨μ μΌλ‘ νΈμΆ
- κ° Toolμ μ λ ₯/μΆλ ₯μ λͺ μμ μΌλ‘ μ μλ JSON μ€ν€λ§λ₯Ό λ°λ¦
νλͺ© | ν¨κ³Ό |
---|---|
π― μλ΅ μ νλ | μ€κ° λ¨κ³λ³ μΆλ‘ μΌλ‘ μ₯μ μΆμ²μ μ λ°λ ν₯μ |
π¦ API νΈνμ± | ꡬ쑰νλ μλ΅ μ€ν€λ§λ‘ λ°±μλμμ ν΅ν©μ΄ μ©μ΄ |
π μ μ§λ³΄μμ± | κ° Toolμ΄ λͺ¨λνλμ΄ λλ²κΉ , κ΅μ²΄, ν μ€νΈκ° μ¬μ |
π νμ₯ κ°λ₯μ± | μλ‘μ΄ Tool μΆκ°λ§μΌλ‘ κΈ°λ₯ νμ₯ (μ: μ¬μ©μ μμΉ νν°) |
- μμΉ κΈ°λ° νν° Tool μΆκ°
- μ‘°κ±΄λ³ μΆμ² μ΄μ μμ± Tool (LLM κΈ°λ°)
- Tool νΈμΆ κ²°κ³Ό μΊμ± λλ λ‘κ·Έ μ μ₯ κΈ°λ₯ μΆκ°
/place_recommendation_chain
βββ schemas.py # Pydantic κΈ°λ° μμ²/μλ΅ μ μ
βββ tools.py # λͺ¨λ Tool ν¨μ μ μ
βββ agent.py # Agent + Tool μ‘°ν© + μ€ν ν¨μ
βββ run.py # ν
μ€νΈμ© μνΈλ¦¬ν¬μΈνΈ (user_input -> final JSON μλ΅)
schemas.py
# schemas.py
from pydantic import BaseModel, Field
from typing import List, Literal
# β
Tool 1: μ¬μ©μ μ
λ ₯ β ν€μλ μΆμΆ
class ExtractKeywordsInput(BaseModel):
user_input: str
class ExtractKeywordsOutput(BaseModel):
keywords: List[str]
# β
Tool 2: ν€μλ β μ μ¬ ν€μλ κ²μ
class SearchSimilarKeywordsInput(BaseModel):
keywords: List[str]
class SimilarKeyword(BaseModel):
keyword: str
score: float
class SearchSimilarKeywordsOutput(BaseModel):
similar_keywords: List[SimilarKeyword]
# β
Tool 3: μ μ¬ ν€μλ β κ°μ€ νκ· λ²‘ν° μμ±
class ComputeUserEmbeddingInput(BaseModel):
similar_keywords: List[SimilarKeyword]
class ComputeUserEmbeddingOutput(BaseModel):
vector: List[float]
# β
Tool 4: μ¬μ©μ λ²‘ν° β μ₯μ μΆμ²
class RecommendPlacesInput(BaseModel):
vector: List[float]
class RecommendedPlace(BaseModel):
place_id: int
similarity_score: float
class RecommendPlacesOutput(BaseModel):
data: List[RecommendedPlace]
tools.py
# tools.py
from langchain.tools import StructuredTool
from schemas import (
ExtractKeywordsInput, ExtractKeywordsOutput,
SearchSimilarKeywordsInput, SearchSimilarKeywordsOutput,
ComputeUserEmbeddingInput, ComputeUserEmbeddingOutput,
RecommendPlacesInput, RecommendPlacesOutput
)
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import GoogleGenerativeAI
import json
import faiss
import numpy as np
import pickle
# π§ κ³΅ν΅ μλ² λ© λͺ¨λΈ
embedding_model = HuggingFaceEmbeddings(model_name="snunlp/KR-SBERT-V40K-klueNLI-augSTS")
# π§ Gemini LLM (μμ)
llm = GoogleGenerativeAI(
model="models/gemini-2.0-flash-lite",
google_api_key="YOUR_GEMINI_API_KEY"
)
# β
Tool 1: ν€μλ μΆμΆ
def extract_keywords_fn(input: ExtractKeywordsInput) -> ExtractKeywordsOutput:
prompt = f"""
λ€μ μ¬μ©μ μ
λ ₯μμ μ₯μ μΆμ²μ μν ν€μλλ₯Ό JSON νμμΌλ‘ μΆμΆνμΈμ.
μ
λ ₯: "{input.user_input}"
μΆλ ₯ μμ:
{{
"keywords": ["λ°μ΄νΈ", "κ°μ±", "μΌκ²½"]
}}
"""
response = llm(prompt)
parsed = json.loads(response)
return ExtractKeywordsOutput(**parsed)
# β
Tool 2: μ μ¬ ν€μλ κ²μ
def search_similar_keywords_fn(input: SearchSimilarKeywordsInput) -> SearchSimilarKeywordsOutput:
index = faiss.read_index("keyword_vectors.index")
with open("keyword_meta.pkl", "rb") as f:
keyword_meta = pickle.load(f)
vectors = embedding_model.embed_documents(input.keywords)
query = np.mean(vectors, axis=0).astype("float32").reshape(1, -1)
D, I = index.search(query, 5)
similar = [{"keyword": keyword_meta[i], "score": float(D[0][j])} for j, i in enumerate(I[0])]
return SearchSimilarKeywordsOutput(similar_keywords=similar)
# β
Tool 3: κ°μ€ νκ· λ²‘ν° μμ±
def compute_user_embedding_fn(input: ComputeUserEmbeddingInput) -> ComputeUserEmbeddingOutput:
vectors = [embedding_model.embed_query(kw.keyword) for kw in input.similar_keywords]
sims = [kw.score for kw in input.similar_keywords]
weighted = sum(np.array(vec) * sim for vec, sim in zip(vectors, sims))
avg_vector = weighted / sum(sims)
return ComputeUserEmbeddingOutput(vector=avg_vector.tolist())
# β
Tool 4: μ₯μ μΆμ²
def recommend_places_fn(input: RecommendPlacesInput) -> RecommendPlacesOutput:
index = faiss.read_index("place_vectors.index")
with open("place_meta.pkl", "rb") as f:
place_meta = pickle.load(f)
vec = np.array(input.vector).astype("float32").reshape(1, -1)
D, I = index.search(vec, 5)
recommended = [
{"place_id": int(place_meta[i]["id"]), "similarity_score": float(D[0][j])}
for j, i in enumerate(I[0])
]
return RecommendPlacesOutput(data=recommended)
agent.py
# agent.py
from langchain.agents import AgentExecutor, Tool, initialize_agent
from langchain.agents.agent_types import AgentType
from langchain_community.llms import GoogleGenerativeAI
from langchain.tools import StructuredTool
from tools import (
extract_keywords_fn,
search_similar_keywords_fn,
compute_user_embedding_fn,
recommend_places_fn
)
from schemas import (
ExtractKeywordsInput, ExtractKeywordsOutput,
SearchSimilarKeywordsInput, SearchSimilarKeywordsOutput,
ComputeUserEmbeddingInput, ComputeUserEmbeddingOutput,
RecommendPlacesInput, RecommendPlacesOutput
)
# β
LLM (Gemini)
llm = GoogleGenerativeAI(
model="models/gemini-pro",
google_api_key="YOUR_GEMINI_API_KEY"
)
# β
Tool λͺ©λ‘ (ꡬ쑰νλ μ
λ ₯/μΆλ ₯ μ€ν€λ§ μ μ©)
tools = [
StructuredTool.from_function(
name="extract_keywords",
description="μ¬μ©μ μ
λ ₯μμ μ₯μ μΆμ²μ νμν ν€μλλ₯Ό μΆμΆν©λλ€.",
func=extract_keywords_fn,
args_schema=ExtractKeywordsInput,
return_schema=ExtractKeywordsOutput
),
StructuredTool.from_function(
name="search_similar_keywords",
description="μΆμ² ν€μλλ₯Ό κΈ°λ°μΌλ‘ λ²‘ν° DBμμ μ μ¬ ν€μλλ₯Ό κ²μν©λλ€.",
func=search_similar_keywords_fn,
args_schema=SearchSimilarKeywordsInput,
return_schema=SearchSimilarKeywordsOutput
),
StructuredTool.from_function(
name="compute_user_embedding",
description="μ μ¬ ν€μλμ μ μ¬λ μ μλ₯Ό κΈ°λ°μΌλ‘ κ°μ€ νκ· μλ² λ© λ²‘ν°λ₯Ό κ³μ°ν©λλ€.",
func=compute_user_embedding_fn,
args_schema=ComputeUserEmbeddingInput,
return_schema=ComputeUserEmbeddingOutput
),
StructuredTool.from_function(
name="recommend_places",
description="μ¬μ©μ μλ² λ© λ²‘ν°λ₯Ό κΈ°λ°μΌλ‘ μ₯μ λ²‘ν° DBμμ μΆμ² κ²°κ³Όλ₯Ό λ°νν©λλ€.",
func=recommend_places_fn,
args_schema=RecommendPlacesInput,
return_schema=RecommendPlacesOutput
)
]
# β
Agent μμ±
agent_executor = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT,
verbose=True
)
run.py
# run.py
from agent import agent_executor
if __name__ == "__main__":
# π ν
μ€νΈμ© μ¬μ©μ μ
λ ₯
user_query = "μ λ
μ λΆμκΈ° μ’μ λ°μ΄νΈ μ₯μ μΆμ²ν΄μ€"
print("\n==============================")
print("π μ¬μ©μ μμ²:", user_query)
print("==============================\n")
# π§ LangChain Agent μ€ν
result = agent_executor.run(user_query)
print("\n==============================")
print("π μ΅μ’
μΆμ² κ²°κ³Ό (ꡬ쑰νλ JSON):")
print(result)
print("==============================")