from maxim import Maxim
from maxim.models import (
YieldedOutput,
YieldedOutputMeta,
YieldedOutputTokenUsage,
YieldedOutputCost,
)
import openai
import time
# Initialize Maxim and OpenAI
maxim = Maxim({"api_key": "your-maxim-api-key"})
client = openai.OpenAI(api_key="your-openai-api-key")
def rag_prompt_function(data):
"""Prompt function with retrieval-augmented generation"""
# Simulate context retrieval (replace with your actual RAG logic)
retrieved_context = f'Context for "{data["input"]}": {data["context_to_evaluate"]}'
system_prompt = """You are a helpful assistant. Use the provided context to answer the user's question accurately.
Context: {context}
Answer the user's question based on the context provided."""
try:
# Start timing the API call
start_time = time.time()
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": system_prompt.format(context=retrieved_context),
},
{"role": "user", "content": data["input"]},
],
temperature=0.3,
max_tokens=200,
)
# Calculate latency in milliseconds
end_time = time.time()
latency_ms = (end_time - start_time) * 1000
return YieldedOutput(
data=response.choices[0].message.content,
retrieved_context_to_evaluate=retrieved_context, # Important for context evaluation
meta=YieldedOutputMeta(
cost=YieldedOutputCost(
input_cost=response.usage.prompt_tokens
* 0.0015
/ 1000, # GPT-3.5 pricing
output_cost=response.usage.completion_tokens * 0.002 / 1000,
total_cost=response.usage.total_tokens * 0.0015 / 1000,
),
usage=YieldedOutputTokenUsage(
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens,
latency=latency_ms,
),
),
)
except Exception as e:
return YieldedOutput(data=f"Error: {str(e)}")
# Test data with context evaluation
test_data_with_context = [
{
"input": "What is the impact of climate change on agriculture?",
"expected_output": "Climate change affects agriculture through temperature changes and weather patterns",
"context_to_evaluate": "Climate change impacts on farming",
}
]
# Run test with context evaluation
result = (
maxim.create_test_run(name="RAG Prompt Test", in_workspace_id="your-workspace-id")
.with_data_structure(
{
"input": "INPUT",
"expected_output": "EXPECTED_OUTPUT",
"context_to_evaluate": "CONTEXT_TO_EVALUATE", # This column's data will be used for context evaluation. It will be overwritten in case the yielded data returns back a context to evaluate
}
)
.with_data(test_data_with_context)
.with_evaluators("Bias", "Clarity", "Faithfulness")
.yields_output(rag_prompt_function)
.run()
)
print(f"Test completed! View results: {result.test_run_result.link}")