0.1.0 • Published 8 months ago

@beledevere/traffic-cop-client v0.1.0

Weekly downloads
-
License
MIT
Repository
github
Last release
8 months ago

Traffic Cop JavaScript Client

npm version License: MIT TypeScript

A JavaScript/TypeScript client for the Traffic Cop API, an intelligent middleware for optimizing LLM API usage.

What is Traffic Cop?

Traffic Cop is a middleware SaaS that optimizes LLM API usage by intelligently routing requests through cost-effective 'Draft' models and high-fidelity 'Verify' models. It helps you:

  • Reduce LLM API costs by using smaller models when appropriate
  • Maintain high quality by verifying with larger models when needed
  • Collect valuable data on model performance and confidence
  • Optimize your LLM strategy with data-driven insights

Traffic Cop supports two execution modes: 1. Advise Mode (default): Traffic Cop provides recommendations, your application executes the LLM calls 2. Proxy Mode: Traffic Cop executes the LLM calls on your behalf

Installation

npm install traffic-cop-client
# or
yarn add traffic-cop-client

Usage

Traffic Cop supports two execution modes:

  • Advise Mode (default): Traffic Cop provides recommendations on whether to use a draft model or verify model, but the client executes the LLM calls.
  • Proxy Mode: Traffic Cop executes the LLM calls on behalf of the client.

Advise Mode (Client Executes LLM Calls)

Advise Mode is the default and recommended mode for most users. In this mode:

  1. Traffic Cop provides recommendations on whether to use a draft model or verify model
  2. Your application executes the LLM calls based on this advice
  3. You report the outcome back to Traffic Cop to help improve future recommendations

Example with OpenAI

import { TrafficCopClient, ExecutionMode, AdviseDecision } from 'traffic-cop-client';
import OpenAI from 'openai';

// Initialize clients
const trafficCop = new TrafficCopClient({
  apiKey: 'your-traffic-cop-api-key',
});

const openai = new OpenAI({
  apiKey: 'your-openai-api-key',
});

// Route a prompt in advise mode
async function adviseExample() {
  try {
    const prompt = 'What is the capital of France?';
    const draftModelId = 'gpt-3.5-turbo';
    const verifyModelId = 'gpt-4';

    // Step 1: Get advice from Traffic Cop
    const advice = await trafficCop.route({
      prompt,
      draftModelId,
      verifyModelId,
      executionMode: ExecutionMode.ADVISE, // Default, can be omitted
      userId: 'user-123', // Optional, will be generated if not provided
      metadata: { // Optional
        source: 'web-app',
        sessionId: 'session-456',
      },
    });

    console.log(`Decision: ${advice.decision}`);
    console.log(`Suggested draft model: ${advice.suggestedDraftModelId}`);
    console.log(`Suggested verify model: ${advice.suggestedVerifyModelId}`);

    // Step 2: Execute the draft model call
    const startDraftTime = Date.now();
    const draftResponse = await openai.chat.completions.create({
      model: draftModelId,
      messages: [{ role: 'user', content: prompt }],
    });
    const draftLatencyMs = Date.now() - startDraftTime;

    const draftContent = draftResponse.choices[0].message.content;
    const draftTokenCount = draftResponse.usage.total_tokens;

    console.log(`Draft response: ${draftContent}`);

    // Step 3: Decide whether to verify based on Traffic Cop's advice
    const shouldVerify = advice.decision === AdviseDecision.VERIFICATION_RECOMMENDED;
    let verifyContent = null;
    let verifyTokenCount = null;
    let verifyLatencyMs = null;

    if (shouldVerify) {
      // Execute the verify model call
      const startVerifyTime = Date.now();
      const verifyResponse = await openai.chat.completions.create({
        model: verifyModelId,
        messages: [{ role: 'user', content: prompt }],
      });
      verifyLatencyMs = Date.now() - startVerifyTime;

      verifyContent = verifyResponse.choices[0].message.content;
      verifyTokenCount = verifyResponse.usage.total_tokens;

      console.log(`Verify response: ${verifyContent}`);
    }

    // Step 4: Choose the final response
    const finalContent = verifyContent || draftContent;

    // Step 5: Report the outcome back to Traffic Cop
    const outcome = await trafficCop.reportExecutionOutcome({
      trafficCopRequestId: advice.trafficCopRequestId,
      userId: 'user-123', // Use the same userId that was passed to route()
      actualDraftModelUsed: draftModelId,
      draftTokenCount: draftTokenCount,
      draftLatencyMs: draftLatencyMs,
      wasVerificationPerformed: shouldVerify,
      finalResponse: finalContent,
      actualVerifyModelUsed: shouldVerify ? verifyModelId : undefined,
      verifyTokenCount: verifyTokenCount,
      verifyLatencyMs: verifyLatencyMs,
      qualityFeedback: 0.95, // Optional feedback score (0-1)
    });

    console.log(`Outcome reported: ${outcome.success}`);
    console.log(`Final response: ${finalContent}`);
  } catch (error) {
    console.error('Error:', error);
  }
}

adviseExample();

Example with Anthropic

import { TrafficCopClient, ExecutionMode, AdviseDecision } from 'traffic-cop-client';
import Anthropic from '@anthropic-ai/sdk';

// Initialize clients
const trafficCop = new TrafficCopClient({
  apiKey: 'your-traffic-cop-api-key',
});

const anthropic = new Anthropic({
  apiKey: 'your-anthropic-api-key',
});

async function adviseWithAnthropic() {
  try {
    const prompt = 'What is the capital of France?';
    const draftModelId = 'claude-instant-1';
    const verifyModelId = 'claude-2';

    // Step 1: Get advice from Traffic Cop
    const advice = await trafficCop.route({
      prompt,
      draftModelId,
      verifyModelId,
    });

    console.log(`Decision: ${advice.decision}`);

    // Step 2: Execute the draft model call
    const startDraftTime = Date.now();
    const draftResponse = await anthropic.messages.create({
      model: draftModelId,
      messages: [{ role: 'user', content: prompt }],
      max_tokens: 1000,
    });
    const draftLatencyMs = Date.now() - startDraftTime;

    const draftContent = draftResponse.content[0].text;
    // Anthropic doesn't provide token count directly in the response
    // This is a rough estimation - in production, consider using a proper tokenizer
    // like @anthropic-ai/tokenizer or a similar library for accurate counts
    const draftTokenCount = Math.ceil((prompt.length + draftContent.length) / 4);

    // Step 3: Decide whether to verify based on Traffic Cop's advice
    const shouldVerify = advice.decision === AdviseDecision.VERIFICATION_RECOMMENDED;
    let verifyContent = null;
    let verifyTokenCount = null;
    let verifyLatencyMs = null;

    if (shouldVerify) {
      const startVerifyTime = Date.now();
      const verifyResponse = await anthropic.messages.create({
        model: verifyModelId,
        messages: [{ role: 'user', content: prompt }],
        max_tokens: 1000,
      });
      verifyLatencyMs = Date.now() - startVerifyTime;

      verifyContent = verifyResponse.content[0].text;
      // Same token count estimation as for draft model
      verifyTokenCount = Math.ceil((prompt.length + verifyContent.length) / 4);
    }

    // Step 4: Choose the final response
    const finalContent = verifyContent || draftContent;

    // Step 5: Report the outcome back to Traffic Cop
    await trafficCop.reportExecutionOutcome({
      trafficCopRequestId: advice.trafficCopRequestId,
      userId: 'user-123', // Use the same userId that was passed to route() or that was auto-generated
      actualDraftModelUsed: draftModelId,
      draftTokenCount: draftTokenCount,
      draftLatencyMs: draftLatencyMs,
      wasVerificationPerformed: shouldVerify,
      finalResponse: finalContent,
      actualVerifyModelUsed: shouldVerify ? verifyModelId : undefined,
      verifyTokenCount: verifyTokenCount,
      verifyLatencyMs: verifyLatencyMs,
    });

    console.log(`Final response: ${finalContent}`);
  } catch (error) {
    console.error('Error:', error);
  }
}

Proxy Mode (Traffic Cop Executes LLM Calls)

In Proxy Mode, Traffic Cop executes the LLM calls on your behalf. Important notes about API keys:

  • Gemini models: Traffic Cop can use its own managed API keys for Gemini models (e.g., gemini-pro, text-bison).
  • Non-Gemini models: You must provide your own API keys for OpenAI (e.g., gpt-3.5-turbo, gpt-4) and Anthropic (e.g., claude-instant, claude-2) models.
import { TrafficCopClient, ExecutionMode } from 'traffic-cop-client';

// Create a client
const client = new TrafficCopClient({
  apiKey: 'your-api-key',
});

// Example with OpenAI models (requires customer API key)
async function proxyOpenAIExample() {
  try {
    const response = await client.route({
      prompt: 'What is the capital of France?',
      draftModelId: 'gpt-3.5-turbo',
      verifyModelId: 'gpt-4',
      executionMode: ExecutionMode.PROXY,
      userId: 'user-123', // Optional, will be generated if not provided
      customerApiKeys: { // Required for OpenAI models
        openai: 'sk-your-openai-key',
      },
      metadata: { // Optional
        source: 'web-app',
        sessionId: 'session-456',
      },
    });

    console.log(`Final response: ${response.finalResponse}`);
    console.log(`Verification used: ${response.verificationUsed}`);
    console.log(`Estimated cost saved: $${response.estimatedCostSaved?.toFixed(6)}`);
  } catch (error) {
    console.error('Error:', error);
  }
}

// Example with Gemini models (Traffic Cop's managed key can be used)
async function proxyGeminiExample() {
  try {
    const response = await client.route({
      prompt: 'What is the capital of France?',
      draftModelId: 'gemini-pro',
      verifyModelId: 'gemini-pro-1.5',
      executionMode: ExecutionMode.PROXY,
      userId: 'user-123',
      // No customerApiKeys needed for Gemini models
    });

    console.log(`Final response: ${response.finalResponse}`);
    console.log(`Verification used: ${response.verificationUsed}`);
  } catch (error) {
    console.error('Error:', error);
  }
}

proxyOpenAIExample();
proxyGeminiExample();

CommonJS

const { TrafficCopClient, ExecutionMode } = require('traffic-cop-client');

// Create a client
const client = new TrafficCopClient({
  apiKey: 'your-api-key',
});

// Route a prompt in advise mode (default)
client.route({
  prompt: 'What is the capital of France?',
  draftModelId: 'gpt-3.5-turbo',
  verifyModelId: 'gpt-4',
})
  .then(advice => {
    console.log(`Decision: ${advice.decision}`);
    // Client would make their own LLM calls here
    // ...

    // Then report the outcome
    return client.reportExecutionOutcome({
      trafficCopRequestId: advice.trafficCopRequestId,
      userId: 'user-123', // Use the same userId that was passed to route() or that was auto-generated
      actualDraftModelUsed: advice.suggestedDraftModelId,
      draftTokenCount: 15,
      draftLatencyMs: 250,
      wasVerificationPerformed: false,
      finalResponse: "Paris is the capital of France.",
    });
  })
  .then(outcome => {
    console.log(`Outcome reported: ${outcome.success}`);
  })
  .catch(error => {
    console.error('Error:', error);
  });

Configuration

The client can be configured with the following options:

interface TrafficCopClientOptions {
  /**
   * API key for authentication
   */
  apiKey: string;

  /**
   * Base URL for the Traffic Cop API
   * @default "https://traffic-cop-api-pbo3cvpjua-uc.a.run.app"
   */
  baseUrl?: string;

  /**
   * Request timeout in milliseconds
   * @default 60000
   */
  timeout?: number;
}

Response Format

The response from the route method depends on the execution mode:

Advise Mode Response

interface AdviseRouteResponse {
  /**
   * Unique request identifier
   */
  requestId: string;

  /**
   * Execution mode (always 'advise' for this response type)
   */
  executionMode: 'advise';

  /**
   * Unique identifier for tracking this request through the system
   */
  trafficCopRequestId: string;

  /**
   * Decision on whether verification is recommended
   */
  decision: 'verification_recommended' | 'draft_sufficient';

  /**
   * Suggested draft model to use
   */
  suggestedDraftModelId: string;

  /**
   * Suggested verify model to use if verification is needed
   */
  suggestedVerifyModelId: string;

  /**
   * Confidence threshold used for routing decision
   */
  thresholdUsed: number;
}

Proxy Mode Response

interface ProxyRouteResponse {
  /**
   * Unique request identifier
   */
  requestId: string;

  /**
   * Execution mode (always 'proxy' for this response type)
   */
  executionMode: 'proxy';

  /**
   * Unique identifier for tracking this request through the system
   */
  trafficCopRequestId: string;

  /**
   * Response from the draft model
   */
  draftResponse: {
    content: string;
    modelId: string;
    tokensUsed: number;
    latencyMs: number;
    confidence?: number;
    metadata?: Record<string, string>;
  };

  /**
   * Response from the verify model (if used)
   */
  verifyResponse?: {
    content: string;
    modelId: string;
    tokensUsed: number;
    latencyMs: number;
    confidence?: number;
    metadata?: Record<string, string>;
  };

  /**
   * The final response content to return to the user
   */
  finalResponse: string;

  /**
   * Whether the verify model was used
   */
  verificationUsed: boolean;

  /**
   * Estimated cost saved by using the draft model (if applicable)
   */
  estimatedCostSaved?: number;

  /**
   * Confidence threshold used for routing decision
   */
  thresholdUsed: number;
}

Report Execution Outcome Response

The response from the reportExecutionOutcome method:

interface ReportExecutionOutcomeResponse {
  /**
   * Whether the report was successfully processed
   */
  success: boolean;

  /**
   * The traffic_cop_request_id that was reported on
   */
  trafficCopRequestId: string;

  /**
   * Status message
   */
  message: string;
}

Important Note on userId: When calling reportExecutionOutcome, always use the same userId that was passed to the original route() call. This ensures consistent tracking of user interactions across the system. The userId represents the end-user identifier, while trafficCopRequestId is used to correlate the specific request-response pair.

Error Handling

The client will throw specific exceptions that you can catch to handle different types of errors:

import {
  TrafficCopClient,
  ExecutionMode,
  TrafficCopError,
  TrafficCopConnectionError,
  TrafficCopAPIError
} from 'traffic-cop-client';

const client = new TrafficCopClient({
  apiKey: 'your-api-key',
});

try {
  const advice = await client.route({
    prompt: 'What is the capital of France?',
    draftModelId: 'gpt-3.5-turbo',
    verifyModelId: 'gpt-4',
  });

  console.log(`Decision: ${advice.decision}`);

  // Execute LLM calls based on the advice...

} catch (error) {
  if (error instanceof TrafficCopAPIError) {
    // Handle API errors (e.g., invalid request, authentication error)
    console.error(`API Error (Status ${error.statusCode}): ${error.detail}`);
  } else if (error instanceof TrafficCopConnectionError) {
    // Handle connection errors (e.g., network issues, timeouts)
    console.error(`Connection Error: ${error.message}`);
  } else if (error instanceof TrafficCopError) {
    // Handle other Traffic Cop errors
    console.error(`Traffic Cop Error: ${error.message}`);
  } else {
    // Handle unexpected errors
    console.error(`Unexpected Error: ${error}`);
  }
}

Exception Types

  • TrafficCopError: Base exception for all Traffic Cop client errors
  • TrafficCopConnectionError: Raised for connection errors (network issues, timeouts)
  • TrafficCopAPIError: Raised when the API returns an error response (includes statusCode and detail)

Contributing

We welcome contributions to the Traffic Cop JavaScript SDK! Please see CONTRIBUTING.md for details on how to contribute.

Development

Setup

  1. Clone the repository:
git clone https://github.com/traffic-cop/traffic-cop-js-sdk.git
cd traffic-cop-js-sdk
  1. Install dependencies:
npm install
# or
yarn

Building

npm run build
# or
yarn build

Running Tests

# Run all tests
npm test
# or
yarn test

# Run with coverage
npm test -- --coverage
# or
yarn test --coverage

Code Style

This project uses:

# Lint code
npm run lint
# or
yarn lint

# Format code
npm run format
# or
yarn format

Support

For support, please:

License

MIT

Links