Model IO

Implementation - Part 1

Steps:

API key loading

import os
import warnings
warnings.filterwarnings('ignore')

with open('../openai_api_key.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

# os.getenv('OPENAI_API_KEY')

Load the text completion model

from langchain.llms import OpenAI
llm = OpenAI()

Single Prompt

prompt = "The impact of the globalization on diverse cultures can be explained as:"

response = llm(prompt=prompt)

response

> '\n\n1. Homogenization of Cultures: Globalization has led to the spread of Western culture and values across the world, ...

print(response)

> 1. Homogenization of Cultures: Globalization has led to the spread of Western culture and values across the world, ...

Multiple prompts

prompts = [
    "The impact of the globalization on diverse cultures can be explained as:",
    "Ecosystems maintains biodiversity as follows:"
]

response = llm.generate(prompts=prompts)

response

> LLMResult(generations=[[Generation(text='\n\n1. Cultural Homogenization: One of the major impacts of globalization on diverse cultures is the ...

print(response.generations[0][0].text)

> 1. Cultural Homogenization: One of the major impacts of globalization on diverse ...

# Print individual responses
for gen_list in response.generations:
    gen = gen_list[0]
    text = gen.text
    print(text)
    print("-"*50)

> 1. Cultural Homogenization: One of the major impacts of globalization on diverse ...

LLM usage Information

response.llm_output

> {'token_usage': {'completion_tokens': 512,
'prompt_tokens': 21,
'total_tokens': 533},
'model_name': 'gpt-3.5-turbo-instruct'}

Response Caching

from langchain.globals import set_llm_cache

# In memory caching

from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())

# SQLite caching

from langchain.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path='../models/cache.db'))

With this, your responses for the same prompts and parameters will be cached. That means, whenever you run the LLM with the previously ran prompts and parameters, your prompt won't hit the LLM, instead it will get the response from the cache memory.

Example: Let's get the response from the LLM for a random prompt

response = llm("Give all the details about Bali...")
# time: 2.8s

When we run the same command again, after running the caching code

response = llm("Give all the details about Bali...")
# time: 0.0s

Schema

* SystemMessage: Role assigned to the AI.
* HumanMessage: Human request or the prompt.
* AIMessage: AI Response as per it's role to the Human request.

from langchain.schema import SystemMessage, HumanMessage

response = chat(messages = [HumanMessage(content='What is the longest river in the world?')])

response # Response is an AIMessage

> AIMessage(content='The longest river in the world is the Nile River, which flows through northeastern Africa for about 4,135 miles (6,650 kilometers).')

# Adding system message

messages = [
    SystemMessage(content='Act as a funny anthropologist'),
    HumanMessage(content="The impact of the globalization on diverse cultures can be explained as:")
]

response = chat(messages=messages)

response

> AIMessage(content="Ah, yes, the fascinating topic of globalization and its impact on diverse

Parameters

[Click Here](https://platform.openai.com/docs/api-reference/chat/create) for the official documentation

response = chat(
    messages=[
        SystemMessage(content='You are an angry doctor'),
        HumanMessage(content='Explain the digestion process in human bodies')
    ],
    model = "gpt-3.5-turbo", # Model for generation,
    temperature=2, # [0, 2] Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
    presence_penalty=2, # [-2.0, 2.0]  increasing the model's likelihood to talk about new topics.
    max_tokens=100
)

print(response.content)

>   Ugh Cyril hung increased values Guards gala? Buck through ik St battleground

Few Shot Prompting

from langchain.schema import AIMessage

system_message = "You are a funny doctor"

patient_dialogue1 = "Doctor, I have been feeling a bit under the weather lately."
sample_response1 = "Under the weather? Did you try checking the forecast before stepping out? You might need a weather app prescription!"

patient_dialogue2 = "My throat has been sore, and I have a cough."
sample_response2 = "The classic sore throat symphony! I recommend a strong dose of chicken soup and a dialy karaoke session. Sing it out, and your throat will thank you."

patient_dialogue3 = "I have a headache."
sample_response3 = "Headache, you say? Have you tried negotiating with it? Maybe it's just looking for a better job inside your brain!"

messages = [
    # SystemMessage(content=system_message),

    HumanMessage(content=patient_dialogue1),
    AIMessage(content=sample_response1),

    HumanMessage(content=patient_dialogue2),
    AIMessage(content=sample_response2),

    HumanMessage(content=patient_dialogue3),
    AIMessage(content=sample_response3),

    HumanMessage(content='I have a stomach pain')
]

response = chat(messages=messages)

print(response.content)

>   Stomach pain, huh? Maybe your stomach is just trying to tell a joke! Have you tried asking it to lighten up a bit?

Exercise

Create a cross-questioning bot with and without a system prompt
Create a bad comedian bot that tries to crack jokes on every single thing that you say playing with the words in your dialogue.

Tasks

Write a blog on few shot prompting
Create a GitHub account

Implementation - Part 2

Steps:

Prompt Templating - Text Completion models

# loading the models
import os
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
import warnings
warnings.filterwarnings("ignore")

with open('../openai_api_key.txt', 'r') as f:
    os.environ['OPENAI_API_KEY'] = f.read()

llm = OpenAI()
chat = ChatOpenAI()

# Cache

from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())

Prompt templating - format strings

prompt_template = "write an essay on {topic}"

prompt = prompt_template.format(topic='data science')

prompt

> 'write an essay on data science'

print(llm(prompt_template.format(topic='science')))

> 

Science is a systematic and logical approach to understanding the natural world. It is a method of acquiring knowledge through observation, experimentation, and analysis. ...

Prompt templating - f-string literals

topic = 'data science' # Need a global variable

prompt = f"Write an essay on {topic}"

prompt

> 'Write an essay on data science'

# To use a local variable, create a function

def get_prompt(topic):

    prompt = f"Write an essay on {topic}"

    return prompt

get_prompt(topic='data science')

> 'Write an essay on data science'

These approaches won't scale up when we work with complex tasks like chains.

Let's learn how to use prompt templates in langchain

Prompt templating using langchain prompt template

Prompt templating - text completion models

from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=['topic'],
    template = "Write an essay on {topic}"
)

prompt = prompt_template.format(topic='data science')

prompt

> 'Write an essay on data science'

Another prompt with more inputs

prompt_template = PromptTemplate(
    input_variables=['topic', 'num_words'],
    template = "Write an essay on {topic} in {num_words} words"
)

prompt = prompt_template.format(topic='data science', num_words=200)

prompt

> 'Write an essay on data science in 200 words'

For the same prompt_tempate, if you put a placeholder for the input_variable, it would still work the same way.

prompt_template = PromptTemplate(
    input_variables=[],
    template = "Write an essay on {topic} in {num_words} words"
)

prompt = prompt_template.format(topic='data science', num_words=200)

prompt

> 'Write an essay on data science in 200 words'

response = llm(prompt)

print(response)

> 

Data science is an interdisciplinary field that combines techniques and tools from statistics, mathematics, computer science, and information science to extract useful insights and knowledge from large and complex datasets. ...

Serialization

prompt_template

> PromptTemplate(input_variables=['num_words', 'topic'], template='Write an essay on {topic} in {num_words} words')

Saving the prompt templates

prompt_template.save("../output/prompt_template.json")

Loading the prompt templates

from langchain.prompts import load_prompt

loaded_prompt_template = load_prompt('../output/prompt_template.json')

loaded_prompt_template

> PromptTemplate(input_variables=['num_words', 'topic'], template='Write an essay on {topic} in {num_words} words')

Prompt templating - chat completion models

Using format strings or f-string literals with langchain schema objects

format strings

prompt_template = "Write a essay on {topic}"

system_message_prompt = SystemMessage(prompt_template.format(topic = "data science"))

system_message_prompt

f-string literals

topic = "data science"

prompt_template = f"Write a essay on {topic}"

system_message_prompt = SystemMessage(prompt_template)

system_message_prompt

Issue: We are defining our inputs way ahead while using this type of prompt templating or making the inputs as global variables

Prompt templating using langchain prompt template

Starting with a simple Human Message Prompt Template

from langchain.prompts.chat import HumanMessagePromptTemplate, ChatPromptTemplate

human_template = "Write an essay on {topic}"

human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])

prompt = chat_prompt.format_prompt(topic='data science')

prompt

> ChatPromptValue(messages=[HumanMessage(content='Write an essay on data science')])

To get the messages from teh ChatPromptValue

# messages = prompt.to_messages()
messages = prompt.messages

messages

> [HumanMessage(content='Write an essay on data science')]

Getting the response from the chat model

response = chat(messages=messages)

response

> AIMessage(content="Data science is a rapidly growing field that involves the collection, analysis, and interpretation...

Similarly, let's do it with Other schema of messages

from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate

System Message Prompt Template

system_template = "You are a nutritionist"
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
system_message_prompt

> SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a nutritionist'))

Human Message Prompt Template

human_template = "Tell the impact of {food_item} on human body when consumed regularly"
human_message_prompt = HumanMessagePromptTemplate.from_template(template=human_template)
human_message_prompt

> HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['food_item'], template='Tell the impact of {food_item} on human body when consumed regularly'))

Chat Prompt Template

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chat_prompt

> ChatPromptTemplate(input_variables=['food_item'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a nutritionist')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['food_item'], template='Tell the impact of {food_item} on human body when consumed regularly'))])

prompt = chat_prompt.format_prompt(food_item='rice')

prompt

> ChatPromptValue(messages=[SystemMessage(content='You are a nutritionist'), HumanMessage(content='Tell the impact of rice on human body when consumed regularly')])

Chat Prompt Value to messages to pass to the chat model

messages = prompt.to_messages()

messages

> [SystemMessage(content='You are a nutritionist'),
 HumanMessage(content='Tell the impact of rice on human body when consumed regularly')]

response = chat(messages=messages)

response

> AIMessage(content="Rice is a staple food for many people around the world and can provide several health benefits when consumed regularly as part of a balanced diet. ...

Implementation - Part 3

Steps

Output Parsers

Loading the language model and setting the cache

import os
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache
import warnings
warnings.filterwarnings('ignore')

with open('../openai_api_key.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

llm = OpenAI()
chat = ChatOpenAI()


set_llm_cache(InMemoryCache())

Steps to use the output parser

format_instructions
parse

Step 1: Create and instance of the parser

from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()

output_parser

> CommaSeparatedListOutputParser()

Step 2: Get the format instructions

output_parser.get_format_instructions()

> 'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

Step 3: Send the instructions to the model

from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate

human_template = "{user_request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(user_request="What are the 7 wonders?", format_instructions=output_parser.get_format_instructions())

prompt

> ChatPromptValue(messages=[HumanMessage(content='What are the 7 wonders?\nYour response should be a list of comma separated values, eg: `foo, bar, baz`')])

messages = prompt.to_messages()

response = chat(messages=messages)

print(response.content)

> Great Pyramid of Giza, Hanging Gardens of Babylon, Statue of Zeus at Olympia, Temple of Artemis at Ephesus, Mausoleum at Halicarnassus, Colossus of Rhodes, Lighthouse of Alexandria

Step 4: use the parser to parse the output

output_parser.parse(response.content)

> ['Great Pyramid of Giza',
 'Hanging Gardens of Babylon',
 'Statue of Zeus at Olympia',
 'Temple of Artemis at Ephesus',
 'Mausoleum at Halicarnassus',
 'Colossus of Rhodes',
 'Lighthouse of Alexandria']

When parser fails?

from langchain.output_parsers import DatetimeOutputParser

output_parser = DatetimeOutputParser()

format_instructions = output_parser.get_format_instructions()

print(format_instructions)

> Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 0278-08-03T19:42:55.481110Z, 1567-04-05T01:30:42.197571Z, 0101-06-24T18:20:21.443663Z

Return ONLY this string, no other words!

human_template = "{human_messsage}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_messsage="When was Jesus Christ born?", format_instructions=format_instructions)
messages = prompt.to_messages()

response = chat(messages=messages)

output = output_parser.parse(response.content)

output

> ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File d:\CodeWork\GitHub\langchain_training\.venv\lib\site-packages\langchain\output_parsers\datetime.py:50, in DatetimeOutputParser.parse(self, response)
     49 try:
---> 50     return datetime.strptime(response.strip(), self.format)
     51 except ValueError as e: ...

OutputFixingParser

from langchain.output_parsers import OutputFixingParser

fixing_parser = OutputFixingParser.from_llm(parser=output_parser, llm=chat)

fixed_output = fixing_parser.parse(response.content)

fixed_output

> datetime.datetime(1, 1, 1, 0, 0)

Fixing might not always work, So let's try multiple times

for chance in range(1, 10):
    try:
        fixed_output = fixing_parser.parse(response.content)
    except:
        continue
    else:
        break

fixed_output

> datetime.datetime(1, 1, 1, 0, 0)

Custom Parsers

Structured Output Parser

Define the response schema

from langchain.output_parsers import ResponseSchema, StructuredOutputParser

response_schemas = [
    ResponseSchema(name="answer", description="answer to the user's question"),
    ResponseSchema(
        name="source",
        description="source used to answer the user's question, should be a website.",
    ),
]

Define the output parser

from langchain.output_parsers import StructuredOutputParser

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser

> StructuredOutputParser(response_schemas=[ResponseSchema(name='answer', description="answer to the user's question", type='string'), ResponseSchema(name='source', description="source used to answer the user's question, should be a website.", type='string')])

Get the format instructions

format_instructions = output_parser.get_format_instructions()
format_instructions

> 'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"answer": string  // answer to the user\'s question\n\t"source": string  // source used to answer the user\'s question, should be a website.\n}\n```

Get the response

human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message = "What's the world's largest man made structure?", format_instructions=format_instructions)
messages = prompt.to_messages()

response = chat(messages=messages)

output = output_parser.parse(response.content)

output

> {'answer': 'The Great Wall of China',
 'source': 'https://www.history.com/topics/great-wall-of-china'}

Let's look at the more powerful way of creating custom parser

PydanticOutputParser

Let's quickly learn about pydantic

Conventional pythonic way of building classes

class Student:
    def __init__(self, name: str):
        self.name = name

john = Student(name='John')
john.name

> 'John'

Similarily

jane = Student(name=1) # Taking int even after defining the name to be str
jane.name

> 1

type(jane.name) # Returning int too

# Conventional approach doesn't have strict type validation

> int

Pydantic has simple syntax with strict type validation

from pydantic import BaseModel

class Student(BaseModel):
    name: str

jane = Student(name=1) # THIS WILL THROW AN ERROR

jane = Student(name='jane')
jane.name

> 'jane'

Let's get back to langchain

When we want our output to be in a specific class object format

First let's define the class

from pydantic import BaseModel, Field
from typing import List

class Car(BaseModel):
    name: str = Field(description="Name of the car")
    model_number: str = Field(description="Model number of the car")
    features: List[str] = Field(description="List of features of the car")

create an instance of our custom parser

from langchain.output_parsers import PydanticOutputParser

output_parser = PydanticOutputParser(pydantic_object=Car)

print(output_parser.get_format_instructions())

> The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema ...

Getting the response

human_template = "{human_message}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(human_message='Tell me about the most expensive car in the world',
                                   format_instructions=output_parser.get_format_instructions())

response = chat(messages=prompt.to_messages())
output = output_parser.parse(response.content)

output

> Car(name='Bugatti La Voiture Noire', model_number='Divo', features=['1500 horsepower engine', '8.0-liter quad-turbocharged W16 engine', 'carbon fiber body', 'top speed of 261 mph'])

type(output)

> __main__.Car

PydanticStructuredOutputParser

The new ChatOpenAI model from langchain_openai supports with_structured_output method, which can take the pydantic models built with pydantic_v1 from langchain_core

pip install langchain_openai

We will still be using from langchain.chat_models import ChatOpenAI for loading the chat model, to follow the standard structure of langchain (though it is depreciated)

import os
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field

with open('../openai_api_key.txt') as f:
    os.environ['OPENAI_API_KEY'] = f.read()

class Car(BaseModel):
    name: str = Field(description="Name of the car")
    model_number: str = Field(description="Model number of the car")
    features: List[str] = Field(description="List of features of the car")

model = ChatOpenAI()
model_with_structure = model.with_structured_output(Car)
model_with_structure.invoke('Tell me about the most expensive car in the world')

> Car(name='Bugatti La Voiture Noire', model_number='1', features=['Luxurious design', 'Powerful engine', 'Top speed of 261 mph', 'Exclusive and limited edition'])

Project ideas

Real time text translation
Text Summarization tool
Q&A System
Travel Planner
Tweet Responder

Exercise

Create a Smart Chef bot that can give you recipes based on the available food items you have in your kitchen.

Let's build a gradio app

import os
from typing import List
import gradio as gr
from pydantic import Field, BaseModel
from langchain.chat_models import ChatOpenAI
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser

# Creating the instance of the chat model

with open('openai_api_key.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

chat = ChatOpenAI()

# Define the Pydantic Model

class SmartChef(BaseModel):
    name: str = Field(description="Name fo the dish")
    ingredients: dict = Field(description="Python dictionary of ingredients and their corresponding quantities as keys and values of the python dictionary respectively")
    instructions: List[str] = Field(description="Python list of instructions to prepare the dish")

# Get format instructions

from langchain.output_parsers import PydanticOutputParser

output_parser = PydanticOutputParser(pydantic_object=SmartChef)
format_instructions = output_parser.get_format_instructions()
format_instructions

def smart_chef(food_items: str) -> list:

    # Getting the response
    human_template = """I have the following list of the food items:

    {food_items}

    Suggest me a recipe only using these food items

    {format_instructions}"""

    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(
        food_items=food_items, format_instructions=format_instructions)

    messages = prompt.to_messages()
    response = chat(messages=messages)
    output = output_parser.parse(response.content)

    dish_name, ingredients, instructions = output.name, output.ingredients, output.instructions
    return dish_name, ingredients, instructions

# Building interface
with gr.Blocks() as demo:
    gr.HTML("<h1 align='center'>Smart Chef</h1>")
    gr.HTML("<h3 align='center'><i>Cook with whatever you have</i></h3>")
    inputs = [gr.Textbox(label='Enter the list of ingredients you have, in a comma separated text', lines=3, placeholder='Example: Chicken, Onion, Tomatoes, ... etc.')]
    generate_btn = gr.Button(value="Generate")
    outputs = [gr.Text(label='Name of the dish'), gr.JSON(label="Ingredients with corresponding quantities"), gr.Textbox(label="Instructions to prepare")]
    generate_btn.click(fn=smart_chef, inputs=inputs, outputs=outputs)

if __name__=="__main__":
    demo.launch(share=True)

In the terminal, run the following command

python src/app.py

Deploying Gradio application in HuggingFace Spaces * Create a HuggingFace account * Install Gitbash (Optional)

For Deploying

import os
from typing import List
import gradio as gr
# from pydantic import Field, BaseModel
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser

# Creating the instance of the chat model

# with open('openai_api_key.txt', 'r') as f:
#     api_key = f.read()

# os.environ['OPENAI_API_KEY'] = api_key

chat = ChatOpenAI()

# Define the Pydantic Model

class SmartChef(BaseModel):
    name: str = Field(description="Name fo the dish")
    ingredients: dict = Field(description="Python dictionary of ingredients and their corresponding quantities as keys and values of the python dictionary respectively")
    instructions: List[str] = Field(description="Python list of instructions to prepare the dish")

# Get format instructions

from langchain.output_parsers import PydanticOutputParser

output_parser = PydanticOutputParser(pydantic_object=SmartChef)
format_instructions = output_parser.get_format_instructions()
format_instructions

def smart_chef(food_items: str) -> list:
    # Getting the response

    human_template = """I have the following list of the food items:
    {food_items}
    Suggest me a recipe only using these food items
    {format_instructions}"""

    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(
        food_items=food_items, format_instructions=format_instructions)

    messages = prompt.to_messages()
    response = chat(messages=messages)
    output = output_parser.parse(response.content)

    dish_name, ingredients, instructions = output.name, output.ingredients, output.instructions
    return dish_name, ingredients, instructions


with gr.Blocks() as demo:
    gr.HTML("<h1 align='center'>Smart Chef</h1>")
    gr.HTML("<h3 align='center'><i>Cook with whatever you have</i></h3>")
    # gr.HTML("## Cook with whatever you have")
    inputs = [gr.Textbox(label='Enter the list of ingredients you have, in a comma separated text', lines=3, placeholder='Example: Chicken, Onion, Tomatoes, ... etc.')]
    generate_btn = gr.Button(value="Generate")
    outputs = [gr.Text(label='Name of the dish'), gr.JSON(label="Ingredients with corresponding quantities"), gr.Textbox(label="Instructions to prepare")]
    generate_btn.click(fn=smart_chef, inputs=inputs, outputs=outputs)

if __name__=="__main__":
    demo.launch()