Cost Optimization
Parent: Shared Infrastructure OVERVIEWStatus: Active
Scope: Model Router → Token Tracking → Caching → Budget Control
Overview
Cost Optimization is the efficiency layer - it wraps all AI operations to route to the optimal model, track costs, cache responses, and enforce budgets.Copy
┌─────────────────────────────────────────────────────────────────────────┐
│ COST OPTIMIZATION PIPELINE │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ AI REQUEST │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ CACHE CHECK │ │
│ │ Hash request → Check fact_cache → Return if hit │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ cache miss │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ BUDGET CHECK │ │
│ │ Check allocation → Verify not exceeded → Continue or reject │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ budget ok │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ MODEL ROUTER │ │
│ │ Analyze task → Select optimal model → Consider cost/quality │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ CIRCUIT BREAKER │ │
│ │ Check error rate → Check cost spike → Allow or block │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ AI PROVIDER │ │
│ │ OpenAI / Anthropic / Google / Local │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ USAGE TRACKING │ │
│ │ Record tokens → Calculate cost → Update allocations │ │
│ └──────────────────────────┬──────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────────┐ │
│ │ CACHE STORE │ │
│ │ Store response if cacheable → Set TTL │ │
│ └─────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────┘
Schema
Model Configuration
Available models with cost and capability info.Copy
CREATE TABLE model_config (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Model identity
provider text NOT NULL, -- 'openai', 'anthropic', 'google', 'local'
model_id text NOT NULL, -- 'gpt-4o', 'claude-sonnet-4-20250514', etc.
display_name text NOT NULL,
-- Pricing (per 1M tokens as of knowledge cutoff - update regularly)
input_cost_per_million numeric(10,4),
output_cost_per_million numeric(10,4),
-- Capabilities
max_tokens integer,
supports_vision boolean DEFAULT false,
supports_function_calling boolean DEFAULT true,
supports_streaming boolean DEFAULT true,
-- Quality tiers
quality_tier text, -- 'premium', 'standard', 'economy', 'batch'
-- Use cases
best_for text[], -- ['long_form', 'coding', 'analysis', 'chat']
-- Limits
requests_per_minute integer,
tokens_per_minute integer,
-- Status
status text DEFAULT 'active', -- 'active', 'deprecated', 'disabled'
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now(),
UNIQUE(provider, model_id)
);
CREATE INDEX idx_model_config_provider ON model_config(provider);
CREATE INDEX idx_model_config_quality ON model_config(quality_tier);
CREATE INDEX idx_model_config_status ON model_config(status);
AI Usage Tracking
Per-request cost tracking.Copy
CREATE TABLE ai_usage_tracking (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Request identification
request_id text UNIQUE, -- UUID for the request
-- Model used
provider text NOT NULL,
model_id text NOT NULL,
-- Token counts
prompt_tokens integer NOT NULL,
completion_tokens integer NOT NULL,
total_tokens integer GENERATED ALWAYS AS (prompt_tokens + completion_tokens) STORED,
-- Cost
cost numeric(10,6) NOT NULL,
-- Context
use_case text, -- 'generation', 'analysis', 'embedding', 'chat'
business_unit text,
client_id uuid,
content_id uuid,
-- Request details
prompt_hash text, -- For cache matching
was_cached boolean DEFAULT false,
-- Timing
latency_ms integer,
-- Metadata
metadata jsonb DEFAULT '{}',
created_at timestamptz DEFAULT now()
) PARTITION BY RANGE (created_at);
-- Create monthly partitions
CREATE TABLE ai_usage_tracking_2025_01 PARTITION OF ai_usage_tracking
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
-- Continue for each month...
CREATE INDEX idx_ai_usage_tracking_model ON ai_usage_tracking(provider, model_id);
CREATE INDEX idx_ai_usage_tracking_use_case ON ai_usage_tracking(use_case);
CREATE INDEX idx_ai_usage_tracking_business_unit ON ai_usage_tracking(business_unit);
CREATE INDEX idx_ai_usage_tracking_client ON ai_usage_tracking(client_id);
CREATE INDEX idx_ai_usage_tracking_created ON ai_usage_tracking(created_at DESC);
CREATE INDEX idx_ai_usage_tracking_prompt_hash ON ai_usage_tracking(prompt_hash);
Fact Cache
Cached AI responses for deterministic queries.Copy
CREATE TABLE fact_cache (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Cache key
cache_key text UNIQUE NOT NULL, -- Hash of normalized prompt
-- Request signature
prompt_hash text NOT NULL,
model_id text NOT NULL,
-- Cached response
response_text text NOT NULL,
response_metadata jsonb DEFAULT '{}',
-- Token counts (for cost estimation)
prompt_tokens integer,
completion_tokens integer,
-- Cache management
hit_count integer DEFAULT 0,
last_hit_at timestamptz,
-- TTL
expires_at timestamptz,
-- Source
original_cost numeric(10,6),
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now()
);
CREATE INDEX idx_fact_cache_key ON fact_cache(cache_key);
CREATE INDEX idx_fact_cache_expires ON fact_cache(expires_at);
CREATE INDEX idx_fact_cache_hits ON fact_cache(hit_count DESC);
Budget Allocations
Spending limits by business unit, client, or use case.Copy
CREATE TABLE budget_allocations (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Allocation scope
scope_type text NOT NULL, -- 'global', 'business_unit', 'client', 'use_case'
scope_id text, -- Specific ID (null for global)
-- Budget period
period_type text NOT NULL, -- 'daily', 'weekly', 'monthly'
period_start date NOT NULL,
-- Amounts
budget_amount numeric(10,2) NOT NULL,
spent_amount numeric(10,2) DEFAULT 0,
remaining_amount numeric(10,2) GENERATED ALWAYS AS (budget_amount - spent_amount) STORED,
-- Alerts
alert_threshold_percent integer DEFAULT 80,
alert_sent boolean DEFAULT false,
-- Actions at limit
action_at_limit text DEFAULT 'warn', -- 'warn', 'throttle', 'block'
-- Status
status text DEFAULT 'active',
created_at timestamptz DEFAULT now(),
updated_at timestamptz DEFAULT now(),
UNIQUE(scope_type, scope_id, period_type, period_start)
);
CREATE INDEX idx_budget_allocations_scope ON budget_allocations(scope_type, scope_id);
CREATE INDEX idx_budget_allocations_period ON budget_allocations(period_start);
CREATE INDEX idx_budget_allocations_status ON budget_allocations(status);
Circuit Breakers
Error and cost spike protection.Copy
CREATE TABLE circuit_breakers (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
-- Breaker scope
breaker_type text NOT NULL, -- 'model', 'provider', 'use_case'
breaker_id text NOT NULL,
-- State
state text DEFAULT 'closed', -- 'closed', 'open', 'half_open'
-- Thresholds
error_threshold integer DEFAULT 5, -- Errors before opening
cost_spike_threshold numeric DEFAULT 2.0, -- X times normal cost
-- Current counts (rolling window)
error_count integer DEFAULT 0,
success_count integer DEFAULT 0,
window_start timestamptz DEFAULT now(),
window_duration interval DEFAULT '5 minutes',
-- Recovery
cooldown_until timestamptz,
half_open_successes integer DEFAULT 0,
half_open_required integer DEFAULT 3,
-- History
last_failure_at timestamptz,
last_failure_reason text,
total_trips integer DEFAULT 0,
updated_at timestamptz DEFAULT now(),
UNIQUE(breaker_type, breaker_id)
);
CREATE INDEX idx_circuit_breakers_state ON circuit_breakers(state);
Model Configuration Data
Copy
const modelConfigs = [
// Premium tier - complex reasoning
{
provider: 'anthropic',
model_id: 'claude-sonnet-4-20250514',
display_name: 'Claude Sonnet 4',
input_cost_per_million: 3.00,
output_cost_per_million: 15.00,
max_tokens: 8192,
supports_vision: true,
quality_tier: 'premium',
best_for: ['long_form', 'analysis', 'coding', 'complex_reasoning']
},
{
provider: 'openai',
model_id: 'gpt-4o',
display_name: 'GPT-4o',
input_cost_per_million: 2.50,
output_cost_per_million: 10.00,
max_tokens: 16384,
supports_vision: true,
quality_tier: 'premium',
best_for: ['long_form', 'analysis', 'coding', 'vision']
},
// Standard tier - good balance
{
provider: 'openai',
model_id: 'gpt-4o-mini',
display_name: 'GPT-4o Mini',
input_cost_per_million: 0.15,
output_cost_per_million: 0.60,
max_tokens: 16384,
supports_vision: true,
quality_tier: 'standard',
best_for: ['chat', 'short_form', 'classification', 'extraction']
},
{
provider: 'anthropic',
model_id: 'claude-haiku-3-5-20241022',
display_name: 'Claude 3.5 Haiku',
input_cost_per_million: 0.80,
output_cost_per_million: 4.00,
max_tokens: 8192,
supports_vision: true,
quality_tier: 'standard',
best_for: ['chat', 'short_form', 'summarization']
},
// Economy tier - high volume
{
provider: 'google',
model_id: 'gemini-2.0-flash',
display_name: 'Gemini 2.0 Flash',
input_cost_per_million: 0.075,
output_cost_per_million: 0.30,
max_tokens: 8192,
supports_vision: true,
quality_tier: 'economy',
best_for: ['batch', 'classification', 'extraction', 'scoring']
},
// Embedding models
{
provider: 'openai',
model_id: 'text-embedding-3-small',
display_name: 'Embedding Small',
input_cost_per_million: 0.02,
output_cost_per_million: 0,
max_tokens: 8191,
quality_tier: 'embedding',
best_for: ['embedding', 'semantic_search']
}
]
AI Client Wrapper
Main Entry Point
Copy
interface AIRequest {
prompt: string
systemPrompt?: string
model?: string // Override auto-selection
maxTokens?: number
temperature?: number
useCase: string // 'generation', 'analysis', etc.
businessUnit?: string
clientId?: string
contentId?: string
cacheable?: boolean
cacheTTL?: number // seconds
}
interface AIResponse {
text: string
model: string
usage: {
promptTokens: number
completionTokens: number
totalTokens: number
cost: number
}
cached: boolean
latencyMs: number
}
async function callAI(request: AIRequest): Promise<AIResponse> {
const requestId = crypto.randomUUID()
const startTime = Date.now()
// 1. Check cache
if (request.cacheable !== false) {
const cached = await checkCache(request)
if (cached) {
await trackUsage({
requestId,
...cached.usage,
wasCached: true,
useCase: request.useCase,
businessUnit: request.businessUnit,
clientId: request.clientId
})
return { ...cached, cached: true, latencyMs: Date.now() - startTime }
}
}
// 2. Check budget
const budgetOk = await checkBudget(request.businessUnit, request.clientId)
if (!budgetOk.allowed) {
throw new BudgetExceededError(budgetOk.reason)
}
// 3. Select model
const model = request.model || await selectModel(request)
// 4. Check circuit breaker
const breakerOk = await checkCircuitBreaker(model)
if (!breakerOk) {
// Try fallback model
const fallback = getFallbackModel(model)
if (fallback) {
model = fallback
} else {
throw new CircuitOpenError(`Circuit breaker open for ${model}`)
}
}
// 5. Make request
try {
const response = await executeAIRequest(model, request)
// 6. Track usage
const cost = calculateCost(model, response.usage)
await trackUsage({
requestId,
provider: getProvider(model),
modelId: model,
promptTokens: response.usage.promptTokens,
completionTokens: response.usage.completionTokens,
cost,
useCase: request.useCase,
businessUnit: request.businessUnit,
clientId: request.clientId,
contentId: request.contentId,
promptHash: hashPrompt(request.prompt),
wasCached: false,
latencyMs: Date.now() - startTime
})
// 7. Update budget
await updateBudgetSpend(request.businessUnit, request.clientId, cost)
// 8. Cache response if cacheable
if (request.cacheable !== false && isCacheableResponse(response)) {
await cacheResponse(request, response, request.cacheTTL)
}
// 9. Record success for circuit breaker
await recordSuccess(model)
return {
text: response.text,
model,
usage: {
...response.usage,
cost
},
cached: false,
latencyMs: Date.now() - startTime
}
} catch (error) {
// Record failure for circuit breaker
await recordFailure(model, error.message)
throw error
}
}
Model Selection
Copy
async function selectModel(request: AIRequest): Promise<string> {
// Get active models
const { data: models } = await supabase
.from('model_config')
.select('*')
.eq('status', 'active')
// Estimate prompt length
const estimatedTokens = estimateTokens(request.prompt + (request.systemPrompt || ''))
// Filter by capability
let candidates = models.filter(m => {
// Must support required tokens
if (m.max_tokens && estimatedTokens > m.max_tokens * 0.8) return false
// Must match use case
if (m.best_for && !m.best_for.includes(request.useCase)) return false
return true
})
if (candidates.length === 0) {
// Fallback to default
return 'gpt-4o-mini'
}
// Score candidates
const scored = candidates.map(m => ({
model: m.model_id,
score: scoreModel(m, request)
}))
// Sort by score descending
scored.sort((a, b) => b.score - a.score)
return scored[0].model
}
function scoreModel(model: ModelConfig, request: AIRequest): number {
let score = 0
// Use case match
if (model.best_for?.includes(request.useCase)) {
score += 30
}
// Quality tier preference based on use case
const qualityPreference = {
generation: 'premium',
analysis: 'premium',
coding: 'premium',
chat: 'standard',
classification: 'economy',
extraction: 'economy',
batch: 'economy',
embedding: 'embedding'
}
if (model.quality_tier === qualityPreference[request.useCase]) {
score += 20
}
// Cost efficiency (inverse of cost)
const avgCostPer1K = (model.input_cost_per_million + model.output_cost_per_million) / 2000
score += Math.max(0, 20 - avgCostPer1K * 10)
// Prefer models with higher rate limits
if (model.requests_per_minute > 100) {
score += 10
}
return score
}
function getFallbackModel(primaryModel: string): string | null {
const fallbacks = {
'claude-sonnet-4-20250514': 'gpt-4o',
'gpt-4o': 'claude-sonnet-4-20250514',
'gpt-4o-mini': 'claude-haiku-3-5-20241022',
'claude-haiku-3-5-20241022': 'gpt-4o-mini',
'gemini-2.0-flash': 'gpt-4o-mini'
}
return fallbacks[primaryModel] || null
}
Caching
Check Cache
Copy
async function checkCache(request: AIRequest): Promise<AIResponse | null> {
const cacheKey = generateCacheKey(request)
const { data: cached } = await supabase
.from('fact_cache')
.select('*')
.eq('cache_key', cacheKey)
.gt('expires_at', new Date().toISOString())
.single()
if (!cached) return null
// Update hit count
await supabase
.from('fact_cache')
.update({
hit_count: cached.hit_count + 1,
last_hit_at: new Date().toISOString()
})
.eq('id', cached.id)
return {
text: cached.response_text,
model: cached.model_id,
usage: {
promptTokens: cached.prompt_tokens,
completionTokens: cached.completion_tokens,
totalTokens: cached.prompt_tokens + cached.completion_tokens,
cost: 0 // No cost for cached response
},
cached: true,
latencyMs: 0
}
}
function generateCacheKey(request: AIRequest): string {
// Normalize prompt for consistent hashing
const normalized = {
prompt: request.prompt.trim().toLowerCase(),
systemPrompt: request.systemPrompt?.trim().toLowerCase(),
temperature: request.temperature || 0
}
return crypto
.createHash('sha256')
.update(JSON.stringify(normalized))
.digest('hex')
}
async function cacheResponse(
request: AIRequest,
response: AIResponse,
ttlSeconds?: number
) {
const cacheKey = generateCacheKey(request)
const ttl = ttlSeconds || getDefaultTTL(request.useCase)
await supabase
.from('fact_cache')
.upsert({
cache_key: cacheKey,
prompt_hash: hashPrompt(request.prompt),
model_id: response.model,
response_text: response.text,
prompt_tokens: response.usage.promptTokens,
completion_tokens: response.usage.completionTokens,
original_cost: response.usage.cost,
expires_at: new Date(Date.now() + ttl * 1000).toISOString()
})
}
function getDefaultTTL(useCase: string): number {
const ttls = {
classification: 86400 * 7, // 1 week
extraction: 86400 * 7, // 1 week
embedding: 86400 * 30, // 1 month
analysis: 86400, // 1 day
generation: 3600, // 1 hour (content changes)
chat: 0 // Don't cache
}
return ttls[useCase] || 3600
}
function isCacheableResponse(response: AIResponse): boolean {
// Don't cache very short responses (might be errors)
if (response.text.length < 50) return false
// Don't cache responses that look like they reference time
if (response.text.match(/today|yesterday|tomorrow|this week|currently/i)) {
return false
}
return true
}
Clean Expired Cache
Copy
// Cron: Daily
async function cleanExpiredCache() {
const { count } = await supabase
.from('fact_cache')
.delete()
.lt('expires_at', new Date().toISOString())
console.log(`Cleaned ${count} expired cache entries`)
// Also clean low-value entries (never hit, > 7 days old)
await supabase
.from('fact_cache')
.delete()
.eq('hit_count', 0)
.lt('created_at', new Date(Date.now() - 7 * 86400000).toISOString())
}
Budget Management
Check Budget
Copy
async function checkBudget(
businessUnit?: string,
clientId?: string
): Promise<{ allowed: boolean; reason?: string }> {
// Check in order: client → business unit → global
const checks = [
clientId ? { scope_type: 'client', scope_id: clientId } : null,
businessUnit ? { scope_type: 'business_unit', scope_id: businessUnit } : null,
{ scope_type: 'global', scope_id: null }
].filter(Boolean)
for (const check of checks) {
const { data: allocation } = await supabase
.from('budget_allocations')
.select('*')
.eq('scope_type', check.scope_type)
.eq('scope_id', check.scope_id)
.eq('status', 'active')
.gte('period_start', getCurrentPeriodStart())
.single()
if (!allocation) continue
if (allocation.remaining_amount <= 0) {
switch (allocation.action_at_limit) {
case 'block':
return {
allowed: false,
reason: `Budget exceeded for ${check.scope_type}: ${check.scope_id || 'global'}`
}
case 'throttle':
// Allow but with delay
await sleep(1000)
break
case 'warn':
// Log warning but allow
console.warn(`Budget warning: ${check.scope_type} ${check.scope_id} at limit`)
break
}
}
// Check alert threshold
const percentUsed = (allocation.spent_amount / allocation.budget_amount) * 100
if (percentUsed >= allocation.alert_threshold_percent && !allocation.alert_sent) {
await sendBudgetAlert(allocation)
}
}
return { allowed: true }
}
async function updateBudgetSpend(
businessUnit?: string,
clientId?: string,
cost: number
) {
const updates = [
clientId ? { scope_type: 'client', scope_id: clientId } : null,
businessUnit ? { scope_type: 'business_unit', scope_id: businessUnit } : null,
{ scope_type: 'global', scope_id: null }
].filter(Boolean)
for (const update of updates) {
await supabase.rpc('increment_budget_spend', {
p_scope_type: update.scope_type,
p_scope_id: update.scope_id,
p_amount: cost
})
}
}
// Supabase function
/*
CREATE OR REPLACE FUNCTION increment_budget_spend(
p_scope_type text,
p_scope_id text,
p_amount numeric
)
RETURNS void
LANGUAGE sql
AS $$
UPDATE budget_allocations
SET
spent_amount = spent_amount + p_amount,
updated_at = now()
WHERE scope_type = p_scope_type
AND (scope_id = p_scope_id OR (p_scope_id IS NULL AND scope_id IS NULL))
AND status = 'active'
AND period_start <= CURRENT_DATE
AND period_start +
CASE period_type
WHEN 'daily' THEN interval '1 day'
WHEN 'weekly' THEN interval '1 week'
WHEN 'monthly' THEN interval '1 month'
END > CURRENT_DATE;
$$;
*/
Create Budget Allocations
Copy
// Cron: Start of each period
async function createBudgetAllocations() {
const today = new Date().toISOString().split('T')[0]
// Get budget templates
const budgetTemplates = [
{ scope_type: 'global', scope_id: null, period_type: 'monthly', amount: 5000 },
{ scope_type: 'business_unit', scope_id: 'publisher', period_type: 'monthly', amount: 2000 },
{ scope_type: 'business_unit', scope_id: 'platform', period_type: 'monthly', amount: 1000 },
{ scope_type: 'business_unit', scope_id: 'agency', period_type: 'monthly', amount: 1500 },
{ scope_type: 'business_unit', scope_id: 'store', period_type: 'monthly', amount: 500 }
]
for (const template of budgetTemplates) {
// Check if allocation exists for current period
const periodStart = getPeriodStart(template.period_type)
const { data: existing } = await supabase
.from('budget_allocations')
.select('id')
.eq('scope_type', template.scope_type)
.eq('scope_id', template.scope_id)
.eq('period_type', template.period_type)
.eq('period_start', periodStart)
.single()
if (!existing) {
await supabase
.from('budget_allocations')
.insert({
...template,
period_start: periodStart,
budget_amount: template.amount
})
}
}
}
Circuit Breaker
Check Circuit Breaker
Copy
async function checkCircuitBreaker(modelId: string): Promise<boolean> {
const { data: breaker } = await supabase
.from('circuit_breakers')
.select('*')
.eq('breaker_type', 'model')
.eq('breaker_id', modelId)
.single()
if (!breaker) {
// No breaker = allowed
return true
}
switch (breaker.state) {
case 'closed':
return true
case 'open':
// Check if cooldown has passed
if (breaker.cooldown_until && new Date(breaker.cooldown_until) < new Date()) {
// Move to half-open
await supabase
.from('circuit_breakers')
.update({
state: 'half_open',
half_open_successes: 0
})
.eq('id', breaker.id)
return true
}
return false
case 'half_open':
// Allow limited requests to test recovery
return true
}
return true
}
async function recordSuccess(modelId: string) {
const { data: breaker } = await supabase
.from('circuit_breakers')
.select('*')
.eq('breaker_type', 'model')
.eq('breaker_id', modelId)
.single()
if (!breaker) return
if (breaker.state === 'half_open') {
const newSuccesses = breaker.half_open_successes + 1
if (newSuccesses >= breaker.half_open_required) {
// Close circuit
await supabase
.from('circuit_breakers')
.update({
state: 'closed',
error_count: 0,
success_count: 0,
window_start: new Date().toISOString()
})
.eq('id', breaker.id)
} else {
await supabase
.from('circuit_breakers')
.update({ half_open_successes: newSuccesses })
.eq('id', breaker.id)
}
} else {
await supabase
.from('circuit_breakers')
.update({
success_count: breaker.success_count + 1
})
.eq('id', breaker.id)
}
}
async function recordFailure(modelId: string, reason: string) {
// Get or create breaker
let { data: breaker } = await supabase
.from('circuit_breakers')
.select('*')
.eq('breaker_type', 'model')
.eq('breaker_id', modelId)
.single()
if (!breaker) {
const { data: created } = await supabase
.from('circuit_breakers')
.insert({
breaker_type: 'model',
breaker_id: modelId
})
.select()
.single()
breaker = created
}
// Check if window has expired
const windowExpired = new Date(breaker.window_start).getTime() +
parseDuration(breaker.window_duration) < Date.now()
if (windowExpired) {
// Reset window
await supabase
.from('circuit_breakers')
.update({
error_count: 1,
success_count: 0,
window_start: new Date().toISOString(),
last_failure_at: new Date().toISOString(),
last_failure_reason: reason
})
.eq('id', breaker.id)
} else {
const newErrorCount = breaker.error_count + 1
if (newErrorCount >= breaker.error_threshold) {
// Open circuit
await supabase
.from('circuit_breakers')
.update({
state: 'open',
error_count: newErrorCount,
cooldown_until: new Date(Date.now() + 60000).toISOString(), // 1 min cooldown
total_trips: breaker.total_trips + 1,
last_failure_at: new Date().toISOString(),
last_failure_reason: reason
})
.eq('id', breaker.id)
} else {
await supabase
.from('circuit_breakers')
.update({
error_count: newErrorCount,
last_failure_at: new Date().toISOString(),
last_failure_reason: reason
})
.eq('id', breaker.id)
}
}
}
Cost Calculation
Copy
function calculateCost(modelId: string, usage: TokenUsage): number {
const costs = {
// Premium
'claude-sonnet-4-20250514': { input: 3.00, output: 15.00 },
'gpt-4o': { input: 2.50, output: 10.00 },
// Standard
'gpt-4o-mini': { input: 0.15, output: 0.60 },
'claude-haiku-3-5-20241022': { input: 0.80, output: 4.00 },
// Economy
'gemini-2.0-flash': { input: 0.075, output: 0.30 },
// Embedding
'text-embedding-3-small': { input: 0.02, output: 0 }
}
const modelCost = costs[modelId] || { input: 1.00, output: 3.00 }
const inputCost = (usage.promptTokens / 1_000_000) * modelCost.input
const outputCost = (usage.completionTokens / 1_000_000) * modelCost.output
return inputCost + outputCost
}
Queries
Daily Cost Summary
Copy
SELECT
DATE(created_at) as date,
provider,
model_id,
COUNT(*) as requests,
SUM(prompt_tokens) as total_prompt_tokens,
SUM(completion_tokens) as total_completion_tokens,
SUM(cost) as total_cost,
COUNT(CASE WHEN was_cached THEN 1 END) as cache_hits,
ROUND(AVG(latency_ms), 0) as avg_latency_ms
FROM ai_usage_tracking
WHERE created_at > now() - interval '7 days'
GROUP BY 1, 2, 3
ORDER BY date DESC, total_cost DESC;
Cost by Business Unit
Copy
SELECT
business_unit,
COUNT(*) as requests,
SUM(cost) as total_cost,
ROUND(AVG(cost), 4) as avg_cost_per_request,
SUM(total_tokens) as total_tokens
FROM ai_usage_tracking
WHERE created_at > now() - interval '30 days'
GROUP BY business_unit
ORDER BY total_cost DESC;
Cache Effectiveness
Copy
SELECT
DATE(created_at) as date,
COUNT(*) as total_requests,
COUNT(CASE WHEN was_cached THEN 1 END) as cache_hits,
ROUND(
100.0 * COUNT(CASE WHEN was_cached THEN 1 END) / COUNT(*),
1
) as hit_rate_percent,
SUM(CASE WHEN was_cached THEN 0 ELSE cost END) as actual_cost,
SUM(cost) as would_be_cost_without_cache
FROM ai_usage_tracking
WHERE created_at > now() - interval '30 days'
GROUP BY 1
ORDER BY 1 DESC;
Budget Status
Copy
SELECT
scope_type,
scope_id,
period_type,
period_start,
budget_amount,
spent_amount,
remaining_amount,
ROUND(100.0 * spent_amount / budget_amount, 1) as percent_used,
action_at_limit
FROM budget_allocations
WHERE status = 'active'
AND period_start <= CURRENT_DATE
ORDER BY percent_used DESC;
Circuit Breaker Status
Copy
SELECT
breaker_type,
breaker_id,
state,
error_count,
success_count,
last_failure_at,
last_failure_reason,
cooldown_until,
total_trips
FROM circuit_breakers
ORDER BY
CASE state
WHEN 'open' THEN 1
WHEN 'half_open' THEN 2
ELSE 3
END,
last_failure_at DESC;
Top Cache Hits
Copy
SELECT
SUBSTRING(response_text, 1, 100) as response_preview,
model_id,
hit_count,
original_cost,
original_cost * hit_count as total_savings,
created_at,
last_hit_at
FROM fact_cache
ORDER BY hit_count DESC
LIMIT 20;
Related Documentation
| Document | Purpose |
|---|---|
| generation-engine.md | Content generation |
| content-intelligence.md | Content analysis |
| SCHEMA.md | Full table definitions |