pip install portkey-ai openinference-instrumentation-portkey arize-otel arize-phoenix "arize[Tracing]>=7.1.0"
from arize.otel import register from openinference.instrumentation.portkey import PortkeyInstrumentor # Setup OpenTelemetry with Arize tracer_provider = register( space_id = os.getenv("ARIZE_SPACE_ID"), api_key = os.getenv("ARIZE_API_KEY"), project_name = "portkey-debate", ) # Enable Portkey instrumentation PortkeyInstrumentor().instrument(tracer_provider=tracer_provider)
from portkey_ai import Portkey import os # Initialize each LLM client with Portkey PORTKEY_API_KEY = os.getenv("PORTKEY_API_KEY") # GPT-4 for "against" arguments openai = Portkey( api_key = PORTKEY_API_KEY, virtual_key = os.getenv("OPENAI_VIRTUAL_KEY") ) # Claude for "pro" arguments claude = Portkey( api_key = PORTKEY_API_KEY, virtual_key = os.getenv("CLAUDE_VIRTUAL_KEY") ) # Gemini for moderation & prompt refinement gemini = Portkey( api_key = PORTKEY_API_KEY, virtual_key = os.getenv("GEMINI_VIRTUAL_KEY") )
def debate_round(topic: str, debate_prompt: str) -> dict: """ Runs one debate round: 1. Claude makes the PRO argument 2. GPT-4 makes the CON argument 3. Gemini scores both and suggests a refined prompt """ # PRO side (Claude) pro_resp = claude.chat.completions.create( messages = [{ "role": "user", "content": f"Argue in favor of: {topic}\n\nContext: {debate_prompt}" }], model = "claude-3-opus-20240229", max_tokens = 250 ) pro_text = pro_resp["choices"][0]["message"]["content"] # CON side (GPT-4) con_resp = openai.chat.completions.create( model = "gpt-4", messages = [{ "role": "user", "content": f"Argue against: {topic}\n\nContext: {debate_prompt}" }] ) con_text = con_resp["choices"][0]["message"]["content"] # Moderator (Gemini) mod_resp = gemini.chat.completions.create( model = "gemini-1.5-pro", messages = [{ "role": "user", "content": f"""You are a debate moderator. Evaluate these arguments on "{topic}": PRO: {pro_text} CON: {con_text} Suggest an improved debate prompt for more balanced arguments.""" }] ) new_prompt = mod_resp["choices"][0]["message"]["content"].strip() return {"pro": pro_text, "con": con_text, "new_prompt": new_prompt}
topic = "Implementing a nationwide four-day workweek" initial_prompt = "Debate the pros and cons of a four-day workweek." rounds = 3 prompt = initial_prompt for i in range(1, rounds + 1): result = debate_round(topic, prompt) print(f"\n── Round {i} ──") print("🔵 PRO:", result["pro"]) print("\n🔴 CON:", result["con"]) print("\n🛠️ Suggested New Prompt:", result["new_prompt"]) prompt = result["new_prompt"]
from arize.exporter import ArizeExportClient from arize.utils.types import Environments from datetime import datetime client = ArizeExportClient() # Export traces from Arize primary_df = client.export_model_to_df( space_id='YOUR_SPACE_ID', model_id='portkey-debate', environment=Environments.TRACING, start_time=datetime.fromisoformat('2025-06-19T07:00:00.000+00:00'), end_time=datetime.fromisoformat('2025-06-28T06:59:59.999+00:00') ) primary_df["input"] = primary_df["attributes.input.value"] primary_df["output"] = primary_df["attributes.output.value"]
from phoenix.evals import ( TOXICITY_PROMPT_RAILS_MAP, TOXICITY_PROMPT_TEMPLATE, OpenAIModel, llm_classify, ) # Configure evaluation model model = OpenAIModel( model_name="gpt-4", temperature=0.0, ) # Run toxicity classification rails = list(TOXICITY_PROMPT_RAILS_MAP.values()) toxic_classifications = llm_classify( dataframe=primary_df, template=TOXICITY_PROMPT_TEMPLATE, model=model, rails=rails, provide_explanation=True, )
from arize.pandas.logger import Client arize_client = Client( space_id=os.getenv("ARIZE_SPACE_ID"), api_key=os.getenv("ARIZE_API_KEY") ) # Log evaluation results arize_client.log_evaluations_sync(toxic_classifications, 'portkey-debate')
Was this page helpful?