본문 바로가기

SK네트웍스 Family AI캠프 10기/Daily 회고

63일차. Modular RAG - LangSmith & Tools

더보기

 

63일 차 회고.

 

 어제 전처리를 기본만 하고 모델을 돌려서 데이터셋을 만들어보려고 했는데 토큰 제한이 걸려서 돌리지 못했다. 그래서 모델을 바꾸는 등의 방법을 사용해야 할 것 같다. 일단은 모델은 뒤로 미뤄두고 전처리에만 집중하기로 했다.

 

 

 

 

1. Modular RAG

 

 

1-1. LangSmith

 

LangSmith

  • LLM 애플리케이션 모니터링, 테스트, 배포 지원 온라인 플랫폼
  • LangChain 기반 애플리케이션의 개발 효율성을 향상시키고, 생산성 증가 및 오류 감소를 목표로 한다.

 

LangSmith Concept

  • Langsmith에 추적 로그 기록
  • Run
    • LLM 애플리케이션에서 수행되는 단일 작업 또는 실행 단위
  • Trace
    • 하나의 입력이 출력으로 처리되는 전체 실행 흐름
    • 여러 개의 Run으로 구성되며, 애플리케이션 내의 전체적인 작동 과정을 추적할 수 있다.
  • Project
    • 관련된 여러 Trace를 묶은 단위

 

 

1-2. Tools

 

Tools

  • LLM이 자연어 명령을 해석하여, 프롬프트에서 파라미터를 추출하고, 그에 맞는 함수를 자동으로 호출한다.

 

Create Tool

# Method 1
from langchain_core.tools import tool

@tool
def multiply(a:int, b:int) -> int:
    """
    Multiply two numbers.
    """
    return a * b

print(f"Function Name: {multiply.name}")
print(f"Function Description: {multiply.description}")
print(f"Function Intput Parameters: {multiply.args}")
"""
Function Name: multiply
Function Description: Multiply two numbers.
Function Intput Parameters: {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}
"""
# Method 2
from typing import Annotated, List

@tool
def multiply_by_max(
        a: Annotated[int, "scale factor"],
        b: Annotated[List[int], "list of ints over which to take maximum"]
):
    """
    Multiply a by them maximum of b
    """
    return a * max(b)

multiply_by_max.args_schema.model_json_schema()
"""
{'description': 'Multiply a by them maximum of b',
 'properties': {'a': {'description': 'scale factor',
   'title': 'A',
   'type': 'integer'},
  'b': {'description': 'list of ints over which to take maximum',
   'items': {'type': 'integer'},
   'title': 'B',
   'type': 'array'}},
 'required': ['a', 'b'],
 'title': 'multiply_by_max',
 'type': 'object'}
"""
# Method 3
from pydantic import BaseModel, Field

class CalculatorInput(BaseModel):
    a: int = Field(description="first number")
    b: int = Field(description="second number")

@tool(
    "my-tool",
    args_schema=CalculatorInput,
    return_direct=True
)
def multiply1(a:int, b:int) -> int:
    """
    Multiply two numbers.
    """
    return a * b
    
multiply1.name
# 'my-tool'

multiply1.args_schema.model_json_schema()
"""
{'properties': {'a': {'description': 'first number',
   'title': 'A',
   'type': 'integer'},
  'b': {'description': 'second number', 'title': 'B', 'type': 'integer'}},
 'required': ['a', 'b'],
 'title': 'CalculatorInput',
 'type': 'object'}
"""
# LLM Model
from langchain_openai import ChatOpenAI

model = ChatOpenAI(
    model="gpt-4o-mini"
)

model = model.bind_tools(
    [multiply, multiply_by_max]
)

result = model.invoke(
    "use multiply tool, the args are 5, 3"
)
result.pretty_print()
"""
================================== Ai Message ==================================
Tool Calls:
  multiply (call_mzaoGjELLF8227wNZBEbIuqX)
 Call ID: call_mzaoGjELLF8227wNZBEbIuqX
  Args:
    a: 5
    b: 3
"""
# Chain
chain = (
    model
    | (lambda msg: msg.tool_calls[0]["args"])
    | multiply
)

result = chain.invoke(
    "use multiply tool, the args are 5, 3"
)
result
# 15

 

Tool Errors

# Tool
@tool
def complex_tool(
        int_arg:int,
        float_arg:float,
        dict_arg:dict
) -> int:
    """
    Do something complex with a complex tool.
    """
    return int_arg * float_arg
from langchain.chat_models import init_chat_model

llm = init_chat_model(
    model="gpt-4o-mini",
    model_provider="openai"
)
llm_with_tools = llm.bind_tools(
    [complex_tool]
)
  • Try - Except
# try_except
from langchain_core.runnables import Runnable, RunnableConfig

def try_except_tool(tool_args:dict, config:RunnableConfig) -> Runnable:
    try:
        complex_tool.invoke(input=tool_args, config=config)
    except Exception as e:
        return f"Error: {e}\n\ntool_args: {tool_args}"
# Chain
chain = (
    llm_with_tools
    | (lambda msg: msg.tool_calls[0]["args"])
    | try_except_tool
)
print(chain.invoke(
    "use complex tool, the args are 5, 2.1, empty dictionary. don't forget dict_arg"
))
"""
error: 1 validation error for complex_tool
dict_arg
  Field required [type=missing, input_value={'int_arg': 5, 'float_arg': 2.1}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing

tool_args: {'int_arg': 5, 'float_arg': 2.1}
"""
  • Fallbacks
# LLM Model
from langchain_openai import ChatOpenAI

model = ChatOpenAI(
    model="gpt-4-1106-preview"
).bind_tools(
    [complex_tool], tool_choice="complex_tool"
)
# Chain
chain_fallback = (
    model
    | (lambda msg: msg.tool_calls[0]["args"])
    | complex_tool
)

chain = (
    llm_with_tools
    | (lambda msg: msg.tool_calls[0]["args"])
    | complex_tool
)

chain_with_fallback = chain.with_fallbacks([chain_fallback])
print(chain_with_fallback.invoke(
    "use complex tool, the args are 5, 2.1, empty dictionary. don't forget dict_arg"
))
# 10.5
  • Retry with Exception
# Prompt
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("placeholder", "{last_output}")
    ]
)
# Exception
from langchain_core.messages import AIMessage, HumanMessage, ToolCall, ToolMessage

class CustomToolException(Exception):
    """
    Custom LangChain tool exception.
    """
    def __init__(self, tool_call:ToolCall, exception:Exception) -> None:
        super().__init__()
        self.tool_call = tool_call
        self.exception = exception

def tool_custom_exception(msg:AIMessage, config:RunnableConfig) -> Runnable:
    try:
        return complex_tool.invoke(msg.tool_calls[0]["args"], config=config)
    except Exception as e:
        raise CustomToolException(msg.tool_calls[0], e)
# Chain
chain = (
    prompt
    | llm_with_tools
    | tool_custom_exception
)
def exception_to_messages(inputs: dict) -> dict:
    exception = inputs.pop("exception")

    messages = [
        AIMessage(content="", tool_calls=[exception.tool_call]),
        ToolMessage(
            tool_call_id=exception.tool_call["id"], content=str(exception.exception)
        ),
        HumanMessage(
            content="The last tool call raised an exception. Try calling the tool again with corrected arguments. Do not repeat mistakes."
        ),
    ]
    inputs["last_output"] = messages
    return inputs

self_correcting_chain = chain.with_fallbacks(
    [exception_to_messages | chain], exception_key="exception"
)
self_correcting_chain.invoke(
    {
        "input": "use complex tool. the args are 5, 2.1, empty dictionary. don't forget dict_arg"
    }
)
# 10.5

 

Python REPL

from langchain_core.tools import Tool
from langchain_Experimental.utilities import PythonREPL

python_repl = PythonREPL()
python_repl.run("print(1+1)")
# 2

repl_tool = Tool9
    name="python_repl",
    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
    func=python_repl.run
)
repl_tool.invoke("print(1+1)")
# 2

 

Tavily Search

from langchain_community.tools import TavilySearchResults

tool = TavilySearchResults(
    max_results=5,
    include_answer=True,
    include_raw_content=True,
    include_domains=["github.io", "wikidocs.net"]
)
result = tool.invoke(
    {
        "query": "LangChain Tools에 대해서 알려줘."
    }
)

 

SQLDatabase

# SQLite DB
from sqlalchemy import create_engine

engine = create_engine("sqlite:///titanic.db")
# Data
import pandas as pd

df = pd.read_csv(DATA_PATH+"titanic.csv")
# SQLite에 Data 추가
df.to_sql("titanic", engine, index=False)
# SQLDatabase
from langchain_community.utilities.sql_database import SQLDatabase

db = SQLDatabase(engine=engine)
# LLM
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
)
# Toolkit
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit

toolkit = SQLDatabaseToolkit(
    db=db,
    llm=llm
)
# Agent
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    agent_type="tool-calling",
    verbose=True
)

result = agent_executor.invoke({"input": "평균 생존자들의 나이는?"})
"""
> Entering new SQL Agent Executor chain...

Invoking: `sql_db_list_tables` with `{}`


titanic
Invoking: `sql_db_schema` with `{'table_names': 'titanic'}`



CREATE TABLE titanic (
	"PassengerId" BIGINT, 
	"Survived" BIGINT, 
	"Pclass" BIGINT, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" FLOAT, 
	"SibSp" BIGINT, 
	"Parch" BIGINT, 
	"Ticket" TEXT, 
	"Fare" FLOAT, 
	"Cabin" TEXT, 
	"Embarked" TEXT
)

/*
3 rows from titanic table:
PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
1	0	3	Braund, Mr. Owen Harris	male	22.0	1	0	A/5 21171	7.25	None	S
2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Thayer)	female	38.0	1	0	PC 17599	71.2833	C85	C
3	1	3	Heikkinen, Miss. Laina	female	26.0	0	0	STON/O2. 3101282	7.925	None	S
*/
Invoking: `sql_db_query_checker` with `{'query': 'SELECT AVG(Age) AS average_age FROM titanic WHERE Survived = 1'}`


```sql
SELECT AVG(Age) AS average_age FROM titanic WHERE Survived = 1
```
Invoking: `sql_db_query` with `{'query': 'SELECT AVG(Age) AS average_age FROM titanic WHERE Survived = 1'}`


[(32.125,)]평균 생존자들의 나이는 약 32.1세입니다.

> Finished chain.
"""