from maxim import Maxim
from maxim.models import (
LocalData,
YieldedOutput,
YieldedOutputCost,
YieldedOutputMeta,
YieldedOutputTokenUsage,
Data,
)
from crewai import Crew, Agent, Task
from langchain_openai import ChatOpenAI
import time
# Initialize Maxim SDK
maxim = Maxim({"api_key": "your-api-key"})
# Initialize LLM for CrewAI
llm = ChatOpenAI(
model="gpt-4o",
)
# Define agents
research_agent = Agent(
role="Research Specialist",
goal="Gather comprehensive information on given topics",
backstory="You are an expert researcher with access to various information sources.",
llm=llm,
verbose=True,
)
writer_agent = Agent(
role="Content Writer",
goal="Create well-structured, engaging content based on research",
backstory="You are a skilled writer who can transform research into compelling content.",
llm=llm,
verbose=True,
)
# Define agent workflow function
def run_crewai_agent(data: LocalData) -> YieldedOutput:
"""Custom agent function using CrewAI"""
start_time = time.time()
user_input = data.get("input", "")
topic = data.get("topic", "")
content_type = data.get("content_type", "article")
# Create tasks
research_task = Task(
description=f"Research the topic: {topic}. Focus on {user_input}",
agent=research_agent,
expected_output="Comprehensive research findings with key points and insights",
)
writing_task = Task(
description=f"Write a {content_type} based on the research findings",
agent=writer_agent,
expected_output=f"A well-structured {content_type} based on the research",
)
# Create and run crew
crew = Crew(
agents=[research_agent, writer_agent],
tasks=[research_task, writing_task],
verbose=True,
)
result = crew.kickoff()
end_time = time.time()
latency = end_time - start_time
return YieldedOutput(
data=result.raw,
retrieved_context_to_evaluate=[
task.raw for task in result.tasks_output[:-1]
], # treating all tasks except the last one as context to evaluate
meta=YieldedOutputMeta(
usage=YieldedOutputTokenUsage(
prompt_tokens=result.token_usage.prompt_tokens,
completion_tokens=result.token_usage.completion_tokens,
total_tokens=result.token_usage.total_tokens,
latency=latency,
),
cost=YieldedOutputCost(
input_cost=result.token_usage.prompt_tokens
* 0.0001, # $0.0001 per token for input
output_cost=result.token_usage.completion_tokens
* 0.0002, # $0.0002 per token for output
total_cost=(result.token_usage.prompt_tokens * 0.0001)
+ (result.token_usage.completion_tokens * 0.0002),
),
),
)
# Test data
test_data: Data = [
{
"input": "Latest trends in artificial intelligence",
"topic": "AI developments in 2024",
"content_type": "blog post",
"expected_output": "Comprehensive blog post about AI trends with current insights",
},
{
"input": "Sustainable energy solutions",
"topic": "Renewable energy technologies",
"content_type": "report",
"expected_output": "Detailed report on sustainable energy solutions and technologies",
},
]
# Run test with custom agent
result = (
maxim.create_test_run(
name="CrewAI Agent Evaluation", in_workspace_id="your-workspace-id"
)
.with_data_structure(
{
"input": "INPUT",
"topic": "VARIABLE",
"content_type": "VARIABLE",
"expected_output": "EXPECTED_OUTPUT",
}
)
.with_data(test_data)
.with_evaluators("Faithfulness", "Clarity", "Output Relevance")
.yields_output(run_crewai_agent)
.run()
)
if result:
print(f"Test run completed! View results: {result.test_run_result.link}")
else:
print("Test run failed!")