The OpenLIT SDK provides server-side evaluations via openlit.eval() (Python) and openlit.eval() (JS/TS). Evaluations use the same engine, rules, contexts, and custom eval types configured in the OpenLIT dashboard — working identically for development (offline) and production (online) stages.
Quick Start
Run your first offline evaluation in 3 lines of code.
Offline evaluations run on the OpenLIT server using the same evaluation engine as online/auto evaluations. The SDK sends your prompt and response to the server, which runs LLM-as-judge evaluation and returns structured results.
import openlit# Option 1: Configure once via init()openlit.init( openlit_url="http://localhost:3000", openlit_api_key="openlit-xxxxx",)# Run evaluationresult = openlit.eval( prompt="What is the capital of France?", response="The capital of France is Lyon.", contexts=["Paris is the capital and largest city of France."],)# Use in assertionsassert result.passed, f"Evaluation failed: {result.failed_evals}"
import openlit, { isPassed, getFailedEvals } from 'openlit';// Option 1: Configure once via init()openlit.init({ openlitUrl: 'http://localhost:3000', openlitApiKey: 'openlit-xxxxx',});// Run evaluationconst result = await openlit.eval({ prompt: 'What is the capital of France?', response: 'The capital of France is Lyon.', contexts: ['Paris is the capital and largest city of France.'],});// Use in assertionsconsole.log(result.success); // trueconsole.log(isPassed(result)); // false — hallucination detectedconsole.log(getFailedEvals(result)); // [{ type: 'hallucination', ... }]
dataset = [ { "prompt": "What is 2+2?", "response": "2+2 equals 4.", "contexts": ["Basic arithmetic."], }, { "prompt": "Who wrote Hamlet?", "response": "Hamlet was written by Charles Dickens.", }, { "prompt": "Describe gravity", "response": "Gravity is the force of attraction between masses.", "eval_types": ["hallucination"], },]batch_result = openlit.eval_batch( dataset=dataset, eval_types=["hallucination", "toxicity"], max_concurrent=5,)print(f"Pass rate: {batch_result.pass_rate:.0%}")assert batch_result.all_passed
import openlit, { isAllPassed, getPassRate } from 'openlit';const batchResult = await openlit.evalBatch({ dataset: [ { prompt: 'What is 2+2?', response: '2+2 equals 4.', contexts: ['Basic arithmetic.'], }, { prompt: 'Who wrote Hamlet?', response: 'Hamlet was written by Charles Dickens.', }, { prompt: 'Describe gravity', response: 'Gravity is the force of attraction between masses.', evalTypes: ['hallucination'], }, ], evalTypes: ['hallucination', 'toxicity'], maxConcurrent: 5,});console.log(`Pass rate: ${(getPassRate(batchResult) * 100).toFixed(0)}%`);console.log(`All passed: ${isAllPassed(batchResult)}`);
The SDK automatically resolves trace attributes for rule engine matching, enabling context-aware evaluations without extra configuration. The resolution order (last wins):
Use offline evaluations in your test suite or CI pipeline:
Python
TypeScript / JavaScript
import openlitimport pytestdef test_no_hallucination(): result = openlit.eval( prompt="What year did WW2 end?", response="World War 2 ended in 1945.", eval_types=["hallucination"], print_results=False, ) assert result.passed, f"Hallucination detected: {result.failed_evals}"def test_batch_quality(): dataset = load_test_cases() # your test data result = openlit.eval_batch( dataset=dataset, print_results=False, ) assert result.pass_rate >= 0.95, f"Pass rate too low: {result.pass_rate:.0%}"
import openlit, { isPassed, getFailedEvals, isAllPassed, getPassRate } from 'openlit';import { describe, test, expect } from 'vitest'; // or jestdescribe('LLM quality', () => { test('no hallucination', async () => { const result = await openlit.eval({ prompt: 'What year did WW2 end?', response: 'World War 2 ended in 1945.', evalTypes: ['hallucination'], printResults: false, }); expect(isPassed(result)).toBe(true); }); test('batch quality', async () => { const result = await openlit.evalBatch({ dataset: loadTestCases(), printResults: false, }); expect(getPassRate(result)).toBeGreaterThanOrEqual(0.95); });});