Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

testCases.ts 11 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
  1. import * as path from 'path';
  2. import * as fs from 'fs';
  3. import yaml from 'js-yaml';
  4. import { parse as parsePath } from 'path';
  5. import { parse as parseCsv } from 'csv-parse/sync';
  6. import { globSync } from 'glob';
  7. import logger from './logger';
  8. import { OpenAiChatCompletionProvider } from './providers/openai';
  9. import { testCaseFromCsvRow } from './csv';
  10. import { fetchCsvFromGoogleSheet } from './googleSheets';
  11. import type {
  12. CsvRow,
  13. ProviderOptions,
  14. TestCase,
  15. TestSuite,
  16. TestSuiteConfig,
  17. VarMapping,
  18. } from './types';
  19. import { loadApiProvider } from './providers';
  20. const SYNTHESIZE_DEFAULT_PROVIDER = 'gpt-4-0125-preview';
  21. function parseJson(json: string): any | undefined {
  22. try {
  23. return JSON.parse(json);
  24. } catch (err) {
  25. return undefined;
  26. }
  27. }
  28. export async function readVarsFiles(
  29. pathOrGlobs: string | string[],
  30. basePath: string = '',
  31. ): Promise<Record<string, string | string[] | object>> {
  32. if (typeof pathOrGlobs === 'string') {
  33. pathOrGlobs = [pathOrGlobs];
  34. }
  35. const ret: Record<string, string | string[] | object> = {};
  36. for (const pathOrGlob of pathOrGlobs) {
  37. const resolvedPath = path.resolve(basePath, pathOrGlob);
  38. const paths = globSync(resolvedPath, {
  39. windowsPathsNoEscape: true,
  40. });
  41. for (const p of paths) {
  42. const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
  43. Object.assign(ret, yamlData);
  44. }
  45. }
  46. return ret;
  47. }
  48. export async function readStandaloneTestsFile(
  49. varsPath: string,
  50. basePath: string = '',
  51. ): Promise<TestCase[]> {
  52. // This function is confusingly named - it reads a CSV, JSON, or YAML file of
  53. // TESTS or test equivalents.
  54. const resolvedVarsPath = path.resolve(basePath, varsPath);
  55. const fileExtension = parsePath(varsPath).ext.slice(1);
  56. let rows: CsvRow[] = [];
  57. if (varsPath.startsWith('https://docs.google.com/spreadsheets/')) {
  58. rows = await fetchCsvFromGoogleSheet(varsPath);
  59. } else if (fileExtension === 'csv') {
  60. rows = parseCsv(fs.readFileSync(resolvedVarsPath, 'utf-8'), { columns: true });
  61. } else if (fileExtension === 'json') {
  62. rows = parseJson(fs.readFileSync(resolvedVarsPath, 'utf-8'));
  63. } else if (fileExtension === 'yaml' || fileExtension === 'yml') {
  64. rows = yaml.load(fs.readFileSync(resolvedVarsPath, 'utf-8')) as unknown as any;
  65. }
  66. return rows.map((row, idx) => {
  67. const test = testCaseFromCsvRow(row);
  68. test.description = `Row #${idx + 1}`;
  69. return test;
  70. });
  71. }
  72. type TestCaseWithVarsFile = TestCase<
  73. Record<string, string | string[] | object> | string | string[]
  74. >;
  75. export async function readTest(
  76. test: string | TestCaseWithVarsFile,
  77. basePath: string = '',
  78. ): Promise<TestCase> {
  79. const loadTestWithVars = async (
  80. testCase: TestCaseWithVarsFile,
  81. testBasePath: string,
  82. ): Promise<TestCase> => {
  83. const ret: TestCase = { ...testCase, vars: undefined };
  84. if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
  85. ret.vars = await readVarsFiles(testCase.vars, testBasePath);
  86. } else {
  87. ret.vars = testCase.vars;
  88. } /*else if (typeof testCase.vars === 'object') {
  89. const vars: Record<string, string | string[] | object> = {};
  90. for (const [key, value] of Object.entries(testCase.vars)) {
  91. if (typeof value === 'string' && value.startsWith('file://')) {
  92. // Load file from disk.
  93. const filePath = path.resolve(testBasePath, value.slice('file://'.length));
  94. if (filePath.endsWith('.yaml') || filePath.endsWith('.yml')) {
  95. vars[key] = (yaml.load(fs.readFileSync(filePath, 'utf-8')) as string).trim();
  96. } else {
  97. vars[key] = fs.readFileSync(filePath, 'utf-8').trim();
  98. }
  99. } else {
  100. // This is a normal key:value.
  101. vars[key] = value;
  102. }
  103. }
  104. ret.vars = vars;
  105. }*/
  106. return ret;
  107. };
  108. let testCase: TestCase;
  109. if (typeof test === 'string') {
  110. const testFilePath = path.resolve(basePath, test);
  111. const testBasePath = path.dirname(testFilePath);
  112. const rawTestCase = yaml.load(fs.readFileSync(testFilePath, 'utf-8')) as TestCaseWithVarsFile;
  113. testCase = await loadTestWithVars(rawTestCase, testBasePath);
  114. } else {
  115. testCase = await loadTestWithVars(test, basePath);
  116. }
  117. if (testCase.provider && typeof testCase.provider !== 'function') {
  118. // Load provider
  119. if (typeof testCase.provider === 'string') {
  120. testCase.provider = await loadApiProvider(testCase.provider);
  121. } else if (typeof testCase.provider.id === 'string') {
  122. testCase.provider = await loadApiProvider(testCase.provider.id, {
  123. options: testCase.provider as ProviderOptions,
  124. basePath,
  125. });
  126. }
  127. }
  128. // Validation of the shape of test
  129. if (!testCase.assert && !testCase.vars && !testCase.options) {
  130. throw new Error(
  131. `Test case must have either assert, vars, or options property. Instead got ${JSON.stringify(
  132. testCase,
  133. null,
  134. 2,
  135. )}`,
  136. );
  137. }
  138. return testCase;
  139. }
  140. export async function readTests(
  141. tests: TestSuiteConfig['tests'],
  142. basePath: string = '',
  143. ): Promise<TestCase[]> {
  144. const ret: TestCase[] = [];
  145. const loadTestsFromGlob = async (loadTestsGlob: string) => {
  146. const resolvedPath = path.resolve(basePath, loadTestsGlob);
  147. const testFiles = globSync(resolvedPath, {
  148. windowsPathsNoEscape: true,
  149. });
  150. const ret = [];
  151. for (const testFile of testFiles) {
  152. let testCases: TestCase[] | undefined;
  153. if (testFile.endsWith('.csv')) {
  154. testCases = await readStandaloneTestsFile(testFile, basePath);
  155. } else if (testFile.endsWith('.yaml') || testFile.endsWith('.yml')) {
  156. testCases = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
  157. } else if (testFile.endsWith('.json')) {
  158. testCases = require(testFile);
  159. } else {
  160. throw new Error(`Unsupported file type for test file: ${testFile}`);
  161. }
  162. if (testCases) {
  163. for (const testCase of testCases) {
  164. ret.push(await readTest(testCase, path.dirname(testFile)));
  165. }
  166. }
  167. }
  168. return ret;
  169. };
  170. if (typeof tests === 'string') {
  171. if (tests.endsWith('yaml') || tests.endsWith('yml')) {
  172. // Points to a tests file with multiple test cases
  173. return loadTestsFromGlob(tests);
  174. } else {
  175. // Points to a tests.csv or Google Sheet
  176. return readStandaloneTestsFile(tests, basePath);
  177. }
  178. } else if (Array.isArray(tests)) {
  179. for (const globOrTest of tests) {
  180. if (typeof globOrTest === 'string') {
  181. // Resolve globs
  182. ret.push(...(await loadTestsFromGlob(globOrTest)));
  183. } else {
  184. // It's just a TestCase
  185. ret.push(await readTest(globOrTest, basePath));
  186. }
  187. }
  188. }
  189. return ret;
  190. }
  191. interface SynthesizeOptions {
  192. prompts: string[];
  193. instructions?: string;
  194. tests: TestCase[];
  195. numPersonas?: number;
  196. numTestCasesPerPersona?: number;
  197. }
  198. export async function synthesizeFromTestSuite(
  199. testSuite: TestSuite,
  200. options: Partial<SynthesizeOptions>,
  201. ) {
  202. return synthesize({
  203. ...options,
  204. prompts: testSuite.prompts.map((prompt) => prompt.raw),
  205. tests: testSuite.tests || [],
  206. });
  207. }
  208. export async function synthesize({
  209. prompts,
  210. instructions,
  211. tests,
  212. numPersonas,
  213. numTestCasesPerPersona,
  214. }: SynthesizeOptions) {
  215. if (prompts.length < 1) {
  216. throw new Error('Dataset synthesis requires at least one prompt.');
  217. }
  218. numPersonas = numPersonas || 5;
  219. numTestCasesPerPersona = numTestCasesPerPersona || 3;
  220. let progressBar;
  221. if (process.env.LOG_LEVEL !== 'debug') {
  222. const cliProgress = await import('cli-progress');
  223. progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
  224. const totalProgressSteps = 1 + numPersonas * numTestCasesPerPersona;
  225. progressBar.start(totalProgressSteps, 0);
  226. }
  227. logger.debug(
  228. `Starting dataset synthesis. We'll begin by generating up to ${numPersonas} personas. Each persona will be used to generate ${numTestCasesPerPersona} test cases.`,
  229. );
  230. // Consider the following prompt for an LLM application: {{prompt}}. List up to 5 user personas that would send this prompt.
  231. logger.debug(`\nGenerating user personas from ${prompts.length} prompts...`);
  232. const provider = new OpenAiChatCompletionProvider(SYNTHESIZE_DEFAULT_PROVIDER, {
  233. config: {
  234. temperature: 1.0,
  235. response_format: {
  236. type: 'json_object',
  237. },
  238. },
  239. });
  240. const promptsString = `<Prompts>
  241. ${prompts.map((prompt) => `<Prompt>\n${prompt}\n</Prompt>`).join('\n')}
  242. </Prompts>`;
  243. const resp = await provider.callApi(
  244. `Consider the following prompt${prompts.length > 1 ? 's' : ''} for an LLM application:
  245. ${promptsString}
  246. List up to ${numPersonas} user personas that would send ${
  247. prompts.length > 1 ? 'these prompts' : 'this prompt'
  248. }. Your response should be JSON of the form {personas: string[]}`,
  249. );
  250. const personas = (JSON.parse(resp.output as string) as { personas: string[] }).personas;
  251. logger.debug(
  252. `\nGenerated ${personas.length} personas:\n${personas.map((p) => ` - ${p}`).join('\n')}`,
  253. );
  254. if (progressBar) {
  255. progressBar.increment();
  256. }
  257. // Extract variable names from the nunjucks template in the prompts
  258. const variableRegex = /{{\s*(\w+)\s*}}/g;
  259. const variables = new Set();
  260. for (const prompt of prompts) {
  261. let match;
  262. while ((match = variableRegex.exec(prompt)) !== null) {
  263. variables.add(match[1]);
  264. }
  265. }
  266. logger.debug(
  267. `\nExtracted ${variables.size} variables from prompts:\n${Array.from(variables)
  268. .map((v) => ` - ${v}`)
  269. .join('\n')}`,
  270. );
  271. const existingTests =
  272. `Here are some existing tests:` +
  273. tests
  274. .map((test) => {
  275. if (!test.vars) {
  276. return;
  277. }
  278. return `<Test>
  279. ${JSON.stringify(test.vars, null, 2)}
  280. </Test>
  281. `;
  282. })
  283. .filter(Boolean)
  284. .slice(0, 100)
  285. .join('\n');
  286. // For each user persona, we will generate a map of variable names to values
  287. const testCaseVars: VarMapping[] = [];
  288. for (let i = 0; i < personas.length; i++) {
  289. const persona = personas[i];
  290. logger.debug(`\nGenerating test cases for persona ${i + 1}...`);
  291. // Construct the prompt for the LLM to generate variable values
  292. const personaPrompt = `Consider ${
  293. prompts.length > 1 ? 'these prompts' : 'this prompt'
  294. }, which contains some {{variables}}:
  295. ${promptsString}
  296. This is your persona:
  297. <Persona>
  298. ${persona}
  299. </Persona>
  300. ${existingTests}
  301. Fully embody this persona and determine a value for each variable, such that the prompt would be sent by this persona.
  302. You are a tester, so try to think of ${numTestCasesPerPersona} sets of values that would be interesting or unusual to test. ${
  303. instructions || ''
  304. }
  305. Your response should contain a JSON map of variable names to values, of the form {vars: {${Array.from(
  306. variables,
  307. )
  308. .map((varName) => `${varName}: string`)
  309. .join(', ')}}[]}`;
  310. // Call the LLM API with the constructed prompt
  311. const personaResponse = await provider.callApi(personaPrompt);
  312. const parsed = JSON.parse(personaResponse.output as string) as {
  313. vars: VarMapping[];
  314. };
  315. for (const vars of parsed.vars) {
  316. logger.debug(`${JSON.stringify(vars, null, 2)}`);
  317. testCaseVars.push(vars);
  318. if (progressBar) {
  319. progressBar.increment();
  320. }
  321. }
  322. }
  323. if (progressBar) {
  324. progressBar.stop();
  325. }
  326. // Dedup test case vars
  327. const uniqueTestCaseStrings = new Set(testCaseVars.map((testCase) => JSON.stringify(testCase)));
  328. const dedupedTestCaseVars: VarMapping[] = Array.from(uniqueTestCaseStrings).map((testCase) =>
  329. JSON.parse(testCase),
  330. );
  331. return dedupedTestCaseVars;
  332. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...