Skip to main content

Cost Optimization

Parent: Shared Infrastructure OVERVIEW
Status: Active
Scope: Model Router → Token Tracking → Caching → Budget Control

Overview

Cost Optimization is the efficiency layer - it wraps all AI operations to route to the optimal model, track costs, cache responses, and enforce budgets.
┌─────────────────────────────────────────────────────────────────────────┐
│                    COST OPTIMIZATION PIPELINE                            │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                          │
│   AI REQUEST                                                             │
│        │                                                                 │
│        ▼                                                                 │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    CACHE CHECK                                   │   │
│   │   Hash request → Check fact_cache → Return if hit               │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │ cache miss                               │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    BUDGET CHECK                                  │   │
│   │   Check allocation → Verify not exceeded → Continue or reject   │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │ budget ok                                │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    MODEL ROUTER                                  │   │
│   │   Analyze task → Select optimal model → Consider cost/quality   │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    CIRCUIT BREAKER                               │   │
│   │   Check error rate → Check cost spike → Allow or block          │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    AI PROVIDER                                   │   │
│   │   OpenAI / Anthropic / Google / Local                           │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    USAGE TRACKING                                │   │
│   │   Record tokens → Calculate cost → Update allocations           │   │
│   └──────────────────────────┬──────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│   ┌─────────────────────────────────────────────────────────────────┐   │
│   │                    CACHE STORE                                   │   │
│   │   Store response if cacheable → Set TTL                         │   │
│   └─────────────────────────────────────────────────────────────────┘   │
│                                                                          │
└─────────────────────────────────────────────────────────────────────────┘

Schema

Model Configuration

Available models with cost and capability info.
CREATE TABLE model_config (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Model identity
  provider text NOT NULL,                   -- 'openai', 'anthropic', 'google', 'local'
  model_id text NOT NULL,                   -- 'gpt-4o', 'claude-sonnet-4-20250514', etc.
  display_name text NOT NULL,
  
  -- Pricing (per 1M tokens as of knowledge cutoff - update regularly)
  input_cost_per_million numeric(10,4),
  output_cost_per_million numeric(10,4),
  
  -- Capabilities
  max_tokens integer,
  supports_vision boolean DEFAULT false,
  supports_function_calling boolean DEFAULT true,
  supports_streaming boolean DEFAULT true,
  
  -- Quality tiers
  quality_tier text,                        -- 'premium', 'standard', 'economy', 'batch'
  
  -- Use cases
  best_for text[],                          -- ['long_form', 'coding', 'analysis', 'chat']
  
  -- Limits
  requests_per_minute integer,
  tokens_per_minute integer,
  
  -- Status
  status text DEFAULT 'active',             -- 'active', 'deprecated', 'disabled'
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now(),
  
  UNIQUE(provider, model_id)
);

CREATE INDEX idx_model_config_provider ON model_config(provider);
CREATE INDEX idx_model_config_quality ON model_config(quality_tier);
CREATE INDEX idx_model_config_status ON model_config(status);

AI Usage Tracking

Per-request cost tracking.
CREATE TABLE ai_usage_tracking (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Request identification
  request_id text UNIQUE,                   -- UUID for the request
  
  -- Model used
  provider text NOT NULL,
  model_id text NOT NULL,
  
  -- Token counts
  prompt_tokens integer NOT NULL,
  completion_tokens integer NOT NULL,
  total_tokens integer GENERATED ALWAYS AS (prompt_tokens + completion_tokens) STORED,
  
  -- Cost
  cost numeric(10,6) NOT NULL,
  
  -- Context
  use_case text,                            -- 'generation', 'analysis', 'embedding', 'chat'
  business_unit text,
  client_id uuid,
  content_id uuid,
  
  -- Request details
  prompt_hash text,                         -- For cache matching
  was_cached boolean DEFAULT false,
  
  -- Timing
  latency_ms integer,
  
  -- Metadata
  metadata jsonb DEFAULT '{}',
  
  created_at timestamptz DEFAULT now()
) PARTITION BY RANGE (created_at);

-- Create monthly partitions
CREATE TABLE ai_usage_tracking_2025_01 PARTITION OF ai_usage_tracking
  FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
-- Continue for each month...

CREATE INDEX idx_ai_usage_tracking_model ON ai_usage_tracking(provider, model_id);
CREATE INDEX idx_ai_usage_tracking_use_case ON ai_usage_tracking(use_case);
CREATE INDEX idx_ai_usage_tracking_business_unit ON ai_usage_tracking(business_unit);
CREATE INDEX idx_ai_usage_tracking_client ON ai_usage_tracking(client_id);
CREATE INDEX idx_ai_usage_tracking_created ON ai_usage_tracking(created_at DESC);
CREATE INDEX idx_ai_usage_tracking_prompt_hash ON ai_usage_tracking(prompt_hash);

Fact Cache

Cached AI responses for deterministic queries.
CREATE TABLE fact_cache (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Cache key
  cache_key text UNIQUE NOT NULL,           -- Hash of normalized prompt
  
  -- Request signature
  prompt_hash text NOT NULL,
  model_id text NOT NULL,
  
  -- Cached response
  response_text text NOT NULL,
  response_metadata jsonb DEFAULT '{}',
  
  -- Token counts (for cost estimation)
  prompt_tokens integer,
  completion_tokens integer,
  
  -- Cache management
  hit_count integer DEFAULT 0,
  last_hit_at timestamptz,
  
  -- TTL
  expires_at timestamptz,
  
  -- Source
  original_cost numeric(10,6),
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now()
);

CREATE INDEX idx_fact_cache_key ON fact_cache(cache_key);
CREATE INDEX idx_fact_cache_expires ON fact_cache(expires_at);
CREATE INDEX idx_fact_cache_hits ON fact_cache(hit_count DESC);

Budget Allocations

Spending limits by business unit, client, or use case.
CREATE TABLE budget_allocations (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Allocation scope
  scope_type text NOT NULL,                 -- 'global', 'business_unit', 'client', 'use_case'
  scope_id text,                            -- Specific ID (null for global)
  
  -- Budget period
  period_type text NOT NULL,                -- 'daily', 'weekly', 'monthly'
  period_start date NOT NULL,
  
  -- Amounts
  budget_amount numeric(10,2) NOT NULL,
  spent_amount numeric(10,2) DEFAULT 0,
  remaining_amount numeric(10,2) GENERATED ALWAYS AS (budget_amount - spent_amount) STORED,
  
  -- Alerts
  alert_threshold_percent integer DEFAULT 80,
  alert_sent boolean DEFAULT false,
  
  -- Actions at limit
  action_at_limit text DEFAULT 'warn',      -- 'warn', 'throttle', 'block'
  
  -- Status
  status text DEFAULT 'active',
  
  created_at timestamptz DEFAULT now(),
  updated_at timestamptz DEFAULT now(),
  
  UNIQUE(scope_type, scope_id, period_type, period_start)
);

CREATE INDEX idx_budget_allocations_scope ON budget_allocations(scope_type, scope_id);
CREATE INDEX idx_budget_allocations_period ON budget_allocations(period_start);
CREATE INDEX idx_budget_allocations_status ON budget_allocations(status);

Circuit Breakers

Error and cost spike protection.
CREATE TABLE circuit_breakers (
  id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
  
  -- Breaker scope
  breaker_type text NOT NULL,               -- 'model', 'provider', 'use_case'
  breaker_id text NOT NULL,
  
  -- State
  state text DEFAULT 'closed',              -- 'closed', 'open', 'half_open'
  
  -- Thresholds
  error_threshold integer DEFAULT 5,        -- Errors before opening
  cost_spike_threshold numeric DEFAULT 2.0, -- X times normal cost
  
  -- Current counts (rolling window)
  error_count integer DEFAULT 0,
  success_count integer DEFAULT 0,
  window_start timestamptz DEFAULT now(),
  window_duration interval DEFAULT '5 minutes',
  
  -- Recovery
  cooldown_until timestamptz,
  half_open_successes integer DEFAULT 0,
  half_open_required integer DEFAULT 3,
  
  -- History
  last_failure_at timestamptz,
  last_failure_reason text,
  total_trips integer DEFAULT 0,
  
  updated_at timestamptz DEFAULT now(),
  
  UNIQUE(breaker_type, breaker_id)
);

CREATE INDEX idx_circuit_breakers_state ON circuit_breakers(state);

Model Configuration Data

const modelConfigs = [
  // Premium tier - complex reasoning
  {
    provider: 'anthropic',
    model_id: 'claude-sonnet-4-20250514',
    display_name: 'Claude Sonnet 4',
    input_cost_per_million: 3.00,
    output_cost_per_million: 15.00,
    max_tokens: 8192,
    supports_vision: true,
    quality_tier: 'premium',
    best_for: ['long_form', 'analysis', 'coding', 'complex_reasoning']
  },
  {
    provider: 'openai',
    model_id: 'gpt-4o',
    display_name: 'GPT-4o',
    input_cost_per_million: 2.50,
    output_cost_per_million: 10.00,
    max_tokens: 16384,
    supports_vision: true,
    quality_tier: 'premium',
    best_for: ['long_form', 'analysis', 'coding', 'vision']
  },
  
  // Standard tier - good balance
  {
    provider: 'openai',
    model_id: 'gpt-4o-mini',
    display_name: 'GPT-4o Mini',
    input_cost_per_million: 0.15,
    output_cost_per_million: 0.60,
    max_tokens: 16384,
    supports_vision: true,
    quality_tier: 'standard',
    best_for: ['chat', 'short_form', 'classification', 'extraction']
  },
  {
    provider: 'anthropic',
    model_id: 'claude-haiku-3-5-20241022',
    display_name: 'Claude 3.5 Haiku',
    input_cost_per_million: 0.80,
    output_cost_per_million: 4.00,
    max_tokens: 8192,
    supports_vision: true,
    quality_tier: 'standard',
    best_for: ['chat', 'short_form', 'summarization']
  },
  
  // Economy tier - high volume
  {
    provider: 'google',
    model_id: 'gemini-2.0-flash',
    display_name: 'Gemini 2.0 Flash',
    input_cost_per_million: 0.075,
    output_cost_per_million: 0.30,
    max_tokens: 8192,
    supports_vision: true,
    quality_tier: 'economy',
    best_for: ['batch', 'classification', 'extraction', 'scoring']
  },
  
  // Embedding models
  {
    provider: 'openai',
    model_id: 'text-embedding-3-small',
    display_name: 'Embedding Small',
    input_cost_per_million: 0.02,
    output_cost_per_million: 0,
    max_tokens: 8191,
    quality_tier: 'embedding',
    best_for: ['embedding', 'semantic_search']
  }
]

AI Client Wrapper

Main Entry Point

interface AIRequest {
  prompt: string
  systemPrompt?: string
  model?: string                    // Override auto-selection
  maxTokens?: number
  temperature?: number
  useCase: string                   // 'generation', 'analysis', etc.
  businessUnit?: string
  clientId?: string
  contentId?: string
  cacheable?: boolean
  cacheTTL?: number                 // seconds
}

interface AIResponse {
  text: string
  model: string
  usage: {
    promptTokens: number
    completionTokens: number
    totalTokens: number
    cost: number
  }
  cached: boolean
  latencyMs: number
}

async function callAI(request: AIRequest): Promise<AIResponse> {
  const requestId = crypto.randomUUID()
  const startTime = Date.now()
  
  // 1. Check cache
  if (request.cacheable !== false) {
    const cached = await checkCache(request)
    if (cached) {
      await trackUsage({
        requestId,
        ...cached.usage,
        wasCached: true,
        useCase: request.useCase,
        businessUnit: request.businessUnit,
        clientId: request.clientId
      })
      return { ...cached, cached: true, latencyMs: Date.now() - startTime }
    }
  }
  
  // 2. Check budget
  const budgetOk = await checkBudget(request.businessUnit, request.clientId)
  if (!budgetOk.allowed) {
    throw new BudgetExceededError(budgetOk.reason)
  }
  
  // 3. Select model
  const model = request.model || await selectModel(request)
  
  // 4. Check circuit breaker
  const breakerOk = await checkCircuitBreaker(model)
  if (!breakerOk) {
    // Try fallback model
    const fallback = getFallbackModel(model)
    if (fallback) {
      model = fallback
    } else {
      throw new CircuitOpenError(`Circuit breaker open for ${model}`)
    }
  }
  
  // 5. Make request
  try {
    const response = await executeAIRequest(model, request)
    
    // 6. Track usage
    const cost = calculateCost(model, response.usage)
    await trackUsage({
      requestId,
      provider: getProvider(model),
      modelId: model,
      promptTokens: response.usage.promptTokens,
      completionTokens: response.usage.completionTokens,
      cost,
      useCase: request.useCase,
      businessUnit: request.businessUnit,
      clientId: request.clientId,
      contentId: request.contentId,
      promptHash: hashPrompt(request.prompt),
      wasCached: false,
      latencyMs: Date.now() - startTime
    })
    
    // 7. Update budget
    await updateBudgetSpend(request.businessUnit, request.clientId, cost)
    
    // 8. Cache response if cacheable
    if (request.cacheable !== false && isCacheableResponse(response)) {
      await cacheResponse(request, response, request.cacheTTL)
    }
    
    // 9. Record success for circuit breaker
    await recordSuccess(model)
    
    return {
      text: response.text,
      model,
      usage: {
        ...response.usage,
        cost
      },
      cached: false,
      latencyMs: Date.now() - startTime
    }
    
  } catch (error) {
    // Record failure for circuit breaker
    await recordFailure(model, error.message)
    throw error
  }
}

Model Selection

async function selectModel(request: AIRequest): Promise<string> {
  // Get active models
  const { data: models } = await supabase
    .from('model_config')
    .select('*')
    .eq('status', 'active')
  
  // Estimate prompt length
  const estimatedTokens = estimateTokens(request.prompt + (request.systemPrompt || ''))
  
  // Filter by capability
  let candidates = models.filter(m => {
    // Must support required tokens
    if (m.max_tokens && estimatedTokens > m.max_tokens * 0.8) return false
    
    // Must match use case
    if (m.best_for && !m.best_for.includes(request.useCase)) return false
    
    return true
  })
  
  if (candidates.length === 0) {
    // Fallback to default
    return 'gpt-4o-mini'
  }
  
  // Score candidates
  const scored = candidates.map(m => ({
    model: m.model_id,
    score: scoreModel(m, request)
  }))
  
  // Sort by score descending
  scored.sort((a, b) => b.score - a.score)
  
  return scored[0].model
}

function scoreModel(model: ModelConfig, request: AIRequest): number {
  let score = 0
  
  // Use case match
  if (model.best_for?.includes(request.useCase)) {
    score += 30
  }
  
  // Quality tier preference based on use case
  const qualityPreference = {
    generation: 'premium',
    analysis: 'premium',
    coding: 'premium',
    chat: 'standard',
    classification: 'economy',
    extraction: 'economy',
    batch: 'economy',
    embedding: 'embedding'
  }
  
  if (model.quality_tier === qualityPreference[request.useCase]) {
    score += 20
  }
  
  // Cost efficiency (inverse of cost)
  const avgCostPer1K = (model.input_cost_per_million + model.output_cost_per_million) / 2000
  score += Math.max(0, 20 - avgCostPer1K * 10)
  
  // Prefer models with higher rate limits
  if (model.requests_per_minute > 100) {
    score += 10
  }
  
  return score
}

function getFallbackModel(primaryModel: string): string | null {
  const fallbacks = {
    'claude-sonnet-4-20250514': 'gpt-4o',
    'gpt-4o': 'claude-sonnet-4-20250514',
    'gpt-4o-mini': 'claude-haiku-3-5-20241022',
    'claude-haiku-3-5-20241022': 'gpt-4o-mini',
    'gemini-2.0-flash': 'gpt-4o-mini'
  }
  
  return fallbacks[primaryModel] || null
}

Caching

Check Cache

async function checkCache(request: AIRequest): Promise<AIResponse | null> {
  const cacheKey = generateCacheKey(request)
  
  const { data: cached } = await supabase
    .from('fact_cache')
    .select('*')
    .eq('cache_key', cacheKey)
    .gt('expires_at', new Date().toISOString())
    .single()
  
  if (!cached) return null
  
  // Update hit count
  await supabase
    .from('fact_cache')
    .update({
      hit_count: cached.hit_count + 1,
      last_hit_at: new Date().toISOString()
    })
    .eq('id', cached.id)
  
  return {
    text: cached.response_text,
    model: cached.model_id,
    usage: {
      promptTokens: cached.prompt_tokens,
      completionTokens: cached.completion_tokens,
      totalTokens: cached.prompt_tokens + cached.completion_tokens,
      cost: 0 // No cost for cached response
    },
    cached: true,
    latencyMs: 0
  }
}

function generateCacheKey(request: AIRequest): string {
  // Normalize prompt for consistent hashing
  const normalized = {
    prompt: request.prompt.trim().toLowerCase(),
    systemPrompt: request.systemPrompt?.trim().toLowerCase(),
    temperature: request.temperature || 0
  }
  
  return crypto
    .createHash('sha256')
    .update(JSON.stringify(normalized))
    .digest('hex')
}

async function cacheResponse(
  request: AIRequest,
  response: AIResponse,
  ttlSeconds?: number
) {
  const cacheKey = generateCacheKey(request)
  const ttl = ttlSeconds || getDefaultTTL(request.useCase)
  
  await supabase
    .from('fact_cache')
    .upsert({
      cache_key: cacheKey,
      prompt_hash: hashPrompt(request.prompt),
      model_id: response.model,
      response_text: response.text,
      prompt_tokens: response.usage.promptTokens,
      completion_tokens: response.usage.completionTokens,
      original_cost: response.usage.cost,
      expires_at: new Date(Date.now() + ttl * 1000).toISOString()
    })
}

function getDefaultTTL(useCase: string): number {
  const ttls = {
    classification: 86400 * 7,    // 1 week
    extraction: 86400 * 7,        // 1 week
    embedding: 86400 * 30,        // 1 month
    analysis: 86400,              // 1 day
    generation: 3600,             // 1 hour (content changes)
    chat: 0                       // Don't cache
  }
  
  return ttls[useCase] || 3600
}

function isCacheableResponse(response: AIResponse): boolean {
  // Don't cache very short responses (might be errors)
  if (response.text.length < 50) return false
  
  // Don't cache responses that look like they reference time
  if (response.text.match(/today|yesterday|tomorrow|this week|currently/i)) {
    return false
  }
  
  return true
}

Clean Expired Cache

// Cron: Daily
async function cleanExpiredCache() {
  const { count } = await supabase
    .from('fact_cache')
    .delete()
    .lt('expires_at', new Date().toISOString())
  
  console.log(`Cleaned ${count} expired cache entries`)
  
  // Also clean low-value entries (never hit, > 7 days old)
  await supabase
    .from('fact_cache')
    .delete()
    .eq('hit_count', 0)
    .lt('created_at', new Date(Date.now() - 7 * 86400000).toISOString())
}

Budget Management

Check Budget

async function checkBudget(
  businessUnit?: string,
  clientId?: string
): Promise<{ allowed: boolean; reason?: string }> {
  // Check in order: client → business unit → global
  const checks = [
    clientId ? { scope_type: 'client', scope_id: clientId } : null,
    businessUnit ? { scope_type: 'business_unit', scope_id: businessUnit } : null,
    { scope_type: 'global', scope_id: null }
  ].filter(Boolean)
  
  for (const check of checks) {
    const { data: allocation } = await supabase
      .from('budget_allocations')
      .select('*')
      .eq('scope_type', check.scope_type)
      .eq('scope_id', check.scope_id)
      .eq('status', 'active')
      .gte('period_start', getCurrentPeriodStart())
      .single()
    
    if (!allocation) continue
    
    if (allocation.remaining_amount <= 0) {
      switch (allocation.action_at_limit) {
        case 'block':
          return {
            allowed: false,
            reason: `Budget exceeded for ${check.scope_type}: ${check.scope_id || 'global'}`
          }
        case 'throttle':
          // Allow but with delay
          await sleep(1000)
          break
        case 'warn':
          // Log warning but allow
          console.warn(`Budget warning: ${check.scope_type} ${check.scope_id} at limit`)
          break
      }
    }
    
    // Check alert threshold
    const percentUsed = (allocation.spent_amount / allocation.budget_amount) * 100
    if (percentUsed >= allocation.alert_threshold_percent && !allocation.alert_sent) {
      await sendBudgetAlert(allocation)
    }
  }
  
  return { allowed: true }
}

async function updateBudgetSpend(
  businessUnit?: string,
  clientId?: string,
  cost: number
) {
  const updates = [
    clientId ? { scope_type: 'client', scope_id: clientId } : null,
    businessUnit ? { scope_type: 'business_unit', scope_id: businessUnit } : null,
    { scope_type: 'global', scope_id: null }
  ].filter(Boolean)
  
  for (const update of updates) {
    await supabase.rpc('increment_budget_spend', {
      p_scope_type: update.scope_type,
      p_scope_id: update.scope_id,
      p_amount: cost
    })
  }
}

// Supabase function
/*
CREATE OR REPLACE FUNCTION increment_budget_spend(
  p_scope_type text,
  p_scope_id text,
  p_amount numeric
)
RETURNS void
LANGUAGE sql
AS $$
  UPDATE budget_allocations
  SET 
    spent_amount = spent_amount + p_amount,
    updated_at = now()
  WHERE scope_type = p_scope_type
    AND (scope_id = p_scope_id OR (p_scope_id IS NULL AND scope_id IS NULL))
    AND status = 'active'
    AND period_start <= CURRENT_DATE
    AND period_start + 
      CASE period_type 
        WHEN 'daily' THEN interval '1 day'
        WHEN 'weekly' THEN interval '1 week'
        WHEN 'monthly' THEN interval '1 month'
      END > CURRENT_DATE;
$$;
*/

Create Budget Allocations

// Cron: Start of each period
async function createBudgetAllocations() {
  const today = new Date().toISOString().split('T')[0]
  
  // Get budget templates
  const budgetTemplates = [
    { scope_type: 'global', scope_id: null, period_type: 'monthly', amount: 5000 },
    { scope_type: 'business_unit', scope_id: 'publisher', period_type: 'monthly', amount: 2000 },
    { scope_type: 'business_unit', scope_id: 'platform', period_type: 'monthly', amount: 1000 },
    { scope_type: 'business_unit', scope_id: 'agency', period_type: 'monthly', amount: 1500 },
    { scope_type: 'business_unit', scope_id: 'store', period_type: 'monthly', amount: 500 }
  ]
  
  for (const template of budgetTemplates) {
    // Check if allocation exists for current period
    const periodStart = getPeriodStart(template.period_type)
    
    const { data: existing } = await supabase
      .from('budget_allocations')
      .select('id')
      .eq('scope_type', template.scope_type)
      .eq('scope_id', template.scope_id)
      .eq('period_type', template.period_type)
      .eq('period_start', periodStart)
      .single()
    
    if (!existing) {
      await supabase
        .from('budget_allocations')
        .insert({
          ...template,
          period_start: periodStart,
          budget_amount: template.amount
        })
    }
  }
}

Circuit Breaker

Check Circuit Breaker

async function checkCircuitBreaker(modelId: string): Promise<boolean> {
  const { data: breaker } = await supabase
    .from('circuit_breakers')
    .select('*')
    .eq('breaker_type', 'model')
    .eq('breaker_id', modelId)
    .single()
  
  if (!breaker) {
    // No breaker = allowed
    return true
  }
  
  switch (breaker.state) {
    case 'closed':
      return true
      
    case 'open':
      // Check if cooldown has passed
      if (breaker.cooldown_until && new Date(breaker.cooldown_until) < new Date()) {
        // Move to half-open
        await supabase
          .from('circuit_breakers')
          .update({
            state: 'half_open',
            half_open_successes: 0
          })
          .eq('id', breaker.id)
        return true
      }
      return false
      
    case 'half_open':
      // Allow limited requests to test recovery
      return true
  }
  
  return true
}

async function recordSuccess(modelId: string) {
  const { data: breaker } = await supabase
    .from('circuit_breakers')
    .select('*')
    .eq('breaker_type', 'model')
    .eq('breaker_id', modelId)
    .single()
  
  if (!breaker) return
  
  if (breaker.state === 'half_open') {
    const newSuccesses = breaker.half_open_successes + 1
    
    if (newSuccesses >= breaker.half_open_required) {
      // Close circuit
      await supabase
        .from('circuit_breakers')
        .update({
          state: 'closed',
          error_count: 0,
          success_count: 0,
          window_start: new Date().toISOString()
        })
        .eq('id', breaker.id)
    } else {
      await supabase
        .from('circuit_breakers')
        .update({ half_open_successes: newSuccesses })
        .eq('id', breaker.id)
    }
  } else {
    await supabase
      .from('circuit_breakers')
      .update({
        success_count: breaker.success_count + 1
      })
      .eq('id', breaker.id)
  }
}

async function recordFailure(modelId: string, reason: string) {
  // Get or create breaker
  let { data: breaker } = await supabase
    .from('circuit_breakers')
    .select('*')
    .eq('breaker_type', 'model')
    .eq('breaker_id', modelId)
    .single()
  
  if (!breaker) {
    const { data: created } = await supabase
      .from('circuit_breakers')
      .insert({
        breaker_type: 'model',
        breaker_id: modelId
      })
      .select()
      .single()
    breaker = created
  }
  
  // Check if window has expired
  const windowExpired = new Date(breaker.window_start).getTime() + 
    parseDuration(breaker.window_duration) < Date.now()
  
  if (windowExpired) {
    // Reset window
    await supabase
      .from('circuit_breakers')
      .update({
        error_count: 1,
        success_count: 0,
        window_start: new Date().toISOString(),
        last_failure_at: new Date().toISOString(),
        last_failure_reason: reason
      })
      .eq('id', breaker.id)
  } else {
    const newErrorCount = breaker.error_count + 1
    
    if (newErrorCount >= breaker.error_threshold) {
      // Open circuit
      await supabase
        .from('circuit_breakers')
        .update({
          state: 'open',
          error_count: newErrorCount,
          cooldown_until: new Date(Date.now() + 60000).toISOString(), // 1 min cooldown
          total_trips: breaker.total_trips + 1,
          last_failure_at: new Date().toISOString(),
          last_failure_reason: reason
        })
        .eq('id', breaker.id)
    } else {
      await supabase
        .from('circuit_breakers')
        .update({
          error_count: newErrorCount,
          last_failure_at: new Date().toISOString(),
          last_failure_reason: reason
        })
        .eq('id', breaker.id)
    }
  }
}

Cost Calculation

function calculateCost(modelId: string, usage: TokenUsage): number {
  const costs = {
    // Premium
    'claude-sonnet-4-20250514': { input: 3.00, output: 15.00 },
    'gpt-4o': { input: 2.50, output: 10.00 },
    
    // Standard
    'gpt-4o-mini': { input: 0.15, output: 0.60 },
    'claude-haiku-3-5-20241022': { input: 0.80, output: 4.00 },
    
    // Economy
    'gemini-2.0-flash': { input: 0.075, output: 0.30 },
    
    // Embedding
    'text-embedding-3-small': { input: 0.02, output: 0 }
  }
  
  const modelCost = costs[modelId] || { input: 1.00, output: 3.00 }
  
  const inputCost = (usage.promptTokens / 1_000_000) * modelCost.input
  const outputCost = (usage.completionTokens / 1_000_000) * modelCost.output
  
  return inputCost + outputCost
}

Queries

Daily Cost Summary

SELECT 
  DATE(created_at) as date,
  provider,
  model_id,
  COUNT(*) as requests,
  SUM(prompt_tokens) as total_prompt_tokens,
  SUM(completion_tokens) as total_completion_tokens,
  SUM(cost) as total_cost,
  COUNT(CASE WHEN was_cached THEN 1 END) as cache_hits,
  ROUND(AVG(latency_ms), 0) as avg_latency_ms
FROM ai_usage_tracking
WHERE created_at > now() - interval '7 days'
GROUP BY 1, 2, 3
ORDER BY date DESC, total_cost DESC;

Cost by Business Unit

SELECT 
  business_unit,
  COUNT(*) as requests,
  SUM(cost) as total_cost,
  ROUND(AVG(cost), 4) as avg_cost_per_request,
  SUM(total_tokens) as total_tokens
FROM ai_usage_tracking
WHERE created_at > now() - interval '30 days'
GROUP BY business_unit
ORDER BY total_cost DESC;

Cache Effectiveness

SELECT 
  DATE(created_at) as date,
  COUNT(*) as total_requests,
  COUNT(CASE WHEN was_cached THEN 1 END) as cache_hits,
  ROUND(
    100.0 * COUNT(CASE WHEN was_cached THEN 1 END) / COUNT(*),
    1
  ) as hit_rate_percent,
  SUM(CASE WHEN was_cached THEN 0 ELSE cost END) as actual_cost,
  SUM(cost) as would_be_cost_without_cache
FROM ai_usage_tracking
WHERE created_at > now() - interval '30 days'
GROUP BY 1
ORDER BY 1 DESC;

Budget Status

SELECT 
  scope_type,
  scope_id,
  period_type,
  period_start,
  budget_amount,
  spent_amount,
  remaining_amount,
  ROUND(100.0 * spent_amount / budget_amount, 1) as percent_used,
  action_at_limit
FROM budget_allocations
WHERE status = 'active'
  AND period_start <= CURRENT_DATE
ORDER BY percent_used DESC;

Circuit Breaker Status

SELECT 
  breaker_type,
  breaker_id,
  state,
  error_count,
  success_count,
  last_failure_at,
  last_failure_reason,
  cooldown_until,
  total_trips
FROM circuit_breakers
ORDER BY 
  CASE state 
    WHEN 'open' THEN 1 
    WHEN 'half_open' THEN 2 
    ELSE 3 
  END,
  last_failure_at DESC;

Top Cache Hits

SELECT 
  SUBSTRING(response_text, 1, 100) as response_preview,
  model_id,
  hit_count,
  original_cost,
  original_cost * hit_count as total_savings,
  created_at,
  last_hit_at
FROM fact_cache
ORDER BY hit_count DESC
LIMIT 20;

DocumentPurpose
generation-engine.mdContent generation
content-intelligence.mdContent analysis
SCHEMA.mdFull table definitions