Retrieval-Augmented Generation (RAG) is a technique that combines information retrieval with text generation. Instead of relying solely on a standalone large language model (LLM), RAG first retrieves relevant data from a knowledge base and provides that context to the LLM. This approach makes responses more accurate and grounded in up-to-date information without requiring model retraining.
A fully managed hybrid search service that powers the retrieval component of RAG applications.
SQL and Python functions that provide access to industry-leading LLMs for the generation component.
-- Create database and warehouse
CREATE DATABASE IF NOT EXISTS cortex_search_db;
CREATE OR REPLACE WAREHOUSE cortex_search_wh WITH
WAREHOUSE_SIZE='X-SMALL';
CREATE OR REPLACE SCHEMA cortex_search_db.services;
-- Create table for documents
CREATE OR REPLACE TABLE support_transcripts (
transcript_text VARCHAR,
region VARCHAR,
agent_id VARCHAR
);
-- Insert sample data
INSERT INTO support_transcripts VALUES
('My internet has been down since yesterday, can you help?', 'North America', 'AG1001'),
('I was overcharged for my last bill, need an explanation.', 'Europe', 'AG1002'),
('How do I reset my password? The email link is not working.', 'Asia', 'AG1003'),
('I received a faulty router, can I get it replaced?', 'North America', 'AG1004');
-- Create Cortex Search Service for RAG retrieval
CREATE OR REPLACE CORTEX SEARCH SERVICE transcript_search_service
ON transcript_text
ATTRIBUTES region
WAREHOUSE = cortex_search_wh
TARGET_LAG = '1 day'
EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
AS (
SELECT
transcript_text,
region,
agent_id
FROM support_transcripts
);
-- Preview search results
SELECT PARSE_JSON(
SNOWFLAKE.CORTEX.SEARCH_PREVIEW(
'cortex_search_db.services.transcript_search_service',
'{
"query": "internet issues",
"columns":["transcript_text", "region"],
"filter": {"@eq": {"region": "North America"} },
"limit": 5
}'
)
)['results'] as results;
-- Use COMPLETE function for RAG response generation
SELECT SNOWFLAKE.CORTEX.COMPLETE(
'mistral-large',
CONCAT(
'Based on the following context, answer the user question.\n\n',
'Context: ', retrieved_context, '\n\n',
'Question: ', user_question, '\n\n',
'Answer:'
)
) AS response;
-- Full RAG pipeline combining retrieval and generation
WITH retrieved_docs AS (
SELECT PARSE_JSON(
SNOWFLAKE.CORTEX.SEARCH_PREVIEW(
'cortex_search_db.services.transcript_search_service',
'{
"query": "billing overcharge",
"columns": ["transcript_text", "region"],
"limit": 3
}'
)
)['results'] AS results
)
SELECT SNOWFLAKE.CORTEX.COMPLETE(
'mistral-large',
CONCAT(
'You are a helpful customer support assistant. ',
'Based on the following support transcripts, provide a helpful response.\n\n',
'Relevant transcripts: ', results::VARCHAR, '\n\n',
'Customer question: How do I dispute a billing charge?\n\n',
'Response:'
)
) AS rag_response
FROM retrieved_docs;
from snowflake.core import Root
from snowflake.snowpark import Session
from snowflake.cortex import Complete
# Create session
CONNECTION_PARAMETERS = {
"account": "your_account",
"user": "your_user",
"password": "your_password",
"warehouse": "cortex_search_wh",
"database": "cortex_search_db",
"schema": "services"
}
session = Session.builder.configs(CONNECTION_PARAMETERS).create()
root = Root(session)
# Get the search service
search_service = (root
.databases["cortex_search_db"]
.schemas["services"]
.cortex_search_services["transcript_search_service"]
)
# Retrieval: Search for relevant documents
user_query = "internet connection problems"
search_response = search_service.search(
query=user_query,
columns=["transcript_text", "region"],
filter={"@eq": {"region": "North America"}},
limit=3
)
# Get retrieved context
retrieved_context = search_response.to_json()
# Generation: Use LLM with retrieved context
prompt = f"""Based on the following customer support transcripts,
provide a helpful response to the user's question.
Retrieved transcripts: {retrieved_context}
User question: {user_query}
Response:"""
response = Complete(
model="mistral-large",
prompt=prompt,
session=session
)
print(response)
SPLIT_TEXT_RECURSIVE_CHARACTER function for chunking