feat(providers): hosted-key support for LLM providers (flag-gated, no rate limiting) by TheodoreSpeaks · Pull Request #5127 · simstudioai/sim · GitHub
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 60 additions & 3 deletions apps/sim/lib/api-key/byok.ts
95 changes: 95 additions & 0 deletions apps/sim/lib/api-key/hosted-cost.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* @vitest-environment node
*/
import { beforeEach, describe, expect, it, vi } from 'vitest'

const { mockRecordUsed, mockRecordCostCharged } = vi.hoisted(() => ({
mockRecordUsed: vi.fn(),
mockRecordCostCharged: vi.fn(),
}))

vi.mock('@/lib/monitoring/metrics', () => ({
hostedKeyMetrics: {
recordUsed: mockRecordUsed,
recordCostCharged: mockRecordCostCharged,
},
}))

import {
calculateHostedCost,
classifyHostedKeyFailure,
emitHostedKeyUsage,
} from '@/lib/api-key/hosted-cost'

describe('calculateHostedCost (tool pricing)', () => {
it('per_request returns the flat fee', () => {
expect(calculateHostedCost({ type: 'per_request', cost: 0.005 }, {}, {})).toEqual({
cost: 0.005,
})
})

it('custom returns a numeric getCost result', () => {
const pricing = { type: 'custom' as const, getCost: () => 0.42 }
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 0.42 })
})

it('custom passes through a structured getCost result with metadata', () => {
const pricing = {
type: 'custom' as const,
getCost: () => ({ cost: 1.5, metadata: { units: 3 } }),
}
expect(calculateHostedCost(pricing, {}, {})).toEqual({ cost: 1.5, metadata: { units: 3 } })
})

it('forwards params and response to custom getCost', () => {
const getCost = vi.fn(() => 1)
const params = { a: 1 }
const response = { b: 2 }
calculateHostedCost({ type: 'custom', getCost }, params, response)
expect(getCost).toHaveBeenCalledWith(params, response)
})
})

describe('classifyHostedKeyFailure', () => {
it('classifies structured SDK errors by status', () => {
expect(classifyHostedKeyFailure({ status: 429 })).toBe('rate_limited')
expect(classifyHostedKeyFailure({ status: 503 })).toBe('rate_limited')
expect(classifyHostedKeyFailure({ status: 401 })).toBe('auth')
expect(classifyHostedKeyFailure({ status: 403, message: 'quota exceeded' })).toBe(
'rate_limited'
)
expect(classifyHostedKeyFailure({ status: 500 })).toBe('other')
})

it('classifies message-embedded status (provider errors with no .status)', () => {
// Regression: the previous `\bunauthor\b` regex never matched "Unauthorized".
expect(classifyHostedKeyFailure(new Error('Unauthorized'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('OpenAI API error (401): bad key'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('Forbidden'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('Invalid API key provided'))).toBe('auth')
expect(classifyHostedKeyFailure(new Error('API error (429): rate limit'))).toBe('rate_limited')
expect(classifyHostedKeyFailure(new Error('Internal Server Error (500)'))).toBe('other')
})
})

describe('emitHostedKeyUsage', () => {
beforeEach(() => {
vi.clearAllMocks()
})

it('records both usage and cost with the provider/tool/key labels', () => {
emitHostedKeyUsage({
provider: 'openai',
tool: 'gpt-4o',
key: 'OPENAI_API_KEY_2',
costTotal: 0.03,
})

expect(mockRecordUsed).toHaveBeenCalledWith({
provider: 'openai',
tool: 'gpt-4o',
key: 'OPENAI_API_KEY_2',
})
expect(mockRecordCostCharged).toHaveBeenCalledWith(0.03, { provider: 'openai', tool: 'gpt-4o' })
})
})
93 changes: 93 additions & 0 deletions apps/sim/lib/api-key/hosted-cost.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { hostedKeyMetrics } from '@/lib/monitoring/metrics'
import type { ToolHostingPricing } from '@/tools/types'

export interface HostedCostResult {
/** Total billable cost in dollars. */
cost: number
/** Optional metadata about the cost (e.g. provider breakdown from `custom` pricing). */
metadata?: Record<string, unknown>
}

/**
* Cost for a hosted-key **tool** call. Tools declare config-driven pricing —
* a flat `per_request` fee or a response-derived `custom` fee. LLM providers do
* NOT use this: their cost is token-based and computed directly via
* {@link import('@/providers/utils').calculateCost}.
*/
export function calculateHostedCost(
pricing: ToolHostingPricing,
params: Record<string, unknown>,
response: Record<string, unknown>
): HostedCostResult {
switch (pricing.type) {
case 'per_request':
return { cost: pricing.cost }

case 'custom': {
const result = pricing.getCost(params, response)
return typeof result === 'number' ? { cost: result } : result
}

default: {
const exhaustiveCheck: never = pricing
throw new Error(`Unknown pricing type: ${(exhaustiveCheck as ToolHostingPricing).type}`)
}
}
}

/**
* Classify a thrown error into a hosted-key failure reason for metrics. Handles
* both structured SDK errors (numeric `.status`) and provider errors that embed
* the status in the message string (e.g. `API error (401): ...`). Some providers
* signal quota/rate-limit via 401/403 + a descriptive message, so those count as
* `rate_limited`, not `auth`.
*/
export function classifyHostedKeyFailure(error: unknown): 'rate_limited' | 'auth' | 'other' {
const status = (error as { status?: number } | null)?.status
const message = ((error as { message?: string } | null)?.message ?? '').toLowerCase()

if (status === 429 || status === 503) return 'rate_limited'
if (status === 401 || status === 403) {
return message.includes('quota') || message.includes('rate limit') ? 'rate_limited' : 'auth'
}

// No structured status (e.g. provider errors that embed it in the message).
if (status === undefined) {
if (
message.includes('quota') ||
message.includes('rate limit') ||
/\b(429|503)\b/.test(message)
)
return 'rate_limited'
if (
/\b(401|403)\b/.test(message) ||
message.includes('unauthor') ||
message.includes('forbidden') ||
message.includes('invalid api key')
)
return 'auth'
}
return 'other'
}

/**
* Emit hosted-key usage telemetry for a completed call. CloudWatch only — never
* a billing write. `recordCostCharged` self-guards on `costTotal > 0`. The
* `tool` label carries the tool id for tools, or the model id for LLM calls.
*/
export function emitHostedKeyUsage(labels: {
provider: string
tool: string
key: string
costTotal: number
}): void {
hostedKeyMetrics.recordUsed({
provider: labels.provider,
tool: labels.tool,
key: labels.key,
})
hostedKeyMetrics.recordCostCharged(labels.costTotal, {
provider: labels.provider,
tool: labels.tool,
})
}
1 change: 1 addition & 0 deletions apps/sim/lib/core/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ export const env = createEnv({
DISABLE_INVITATIONS: z.boolean().optional(), // Disable workspace invitations globally (for self-hosted deployments)
DISABLE_PUBLIC_API: z.boolean().optional(), // Disable public API access globally (for self-hosted deployments)
MOTHERSHIP_BETA_FEATURES: z.boolean().optional(), // Enable beta Mothership planning/changelog artifact surfaces
HOSTED_KEY_LLM: z.boolean().optional(), // Route hosted LLM calls through the hosted-key framework (acquire + centralized cost + metrics), no rate limiting

// Development Tools
REACT_GRAB_ENABLED: z.boolean().optional(), // Enable React Grab for UI element debugging in Cursor/AI agents (dev only)
Expand Down
8 changes: 8 additions & 0 deletions apps/sim/lib/core/config/feature-flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ const FEATURE_FLAGS = {
'user context — use enabled:true for global rollout rather than per-user targeting.',
fallback: 'MOTHERSHIP_BETA_FEATURES',
},
'hosted-key-llm': {
description:
'Route hosted LLM provider calls through the hosted-key framework (acquire + centralized ' +
'cost + metrics), with no rate limiting. Off = legacy getRotatingApiKey path. Evaluated ' +
'server-side with userId only (no orgId in the provider request), so roll out globally or ' +
'per-userId.',
fallback: 'HOSTED_KEY_LLM',
},
'table-snapshot-cache': {
description:
'Mount Sim tables into code sandboxes by reference via a version-keyed CSV snapshot in ' +
Expand Down
Loading
Loading