Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

prompts.ts 15 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
  1. import * as path from 'path';
  2. import * as fs from 'fs';
  3. import chalk from 'chalk';
  4. import invariant from 'tiny-invariant';
  5. import { globSync } from 'glob';
  6. import { PythonShell, Options as PythonShellOptions } from 'python-shell';
  7. import logger from './logger';
  8. import { runPython } from './python/wrapper';
  9. import { importModule } from './esm';
  10. import { safeJsonStringify } from './util';
  11. import type {
  12. UnifiedConfig,
  13. Prompt,
  14. ProviderOptionsMap,
  15. TestSuite,
  16. ProviderOptions,
  17. ApiProvider,
  18. } from './types';
  19. export * from './external/ragas';
  20. const PROMPT_DELIMITER = process.env.PROMPTFOO_PROMPT_SEPARATOR || '---';
  21. export function readProviderPromptMap(
  22. config: Partial<UnifiedConfig>,
  23. parsedPrompts: Prompt[],
  24. ): TestSuite['providerPromptMap'] {
  25. const ret: Record<string, string[]> = {};
  26. if (!config.providers) {
  27. return ret;
  28. }
  29. const allPrompts = [];
  30. for (const prompt of parsedPrompts) {
  31. allPrompts.push(prompt.label);
  32. }
  33. if (typeof config.providers === 'string') {
  34. return { [config.providers]: allPrompts };
  35. }
  36. if (typeof config.providers === 'function') {
  37. return { 'Custom function': allPrompts };
  38. }
  39. for (const provider of config.providers) {
  40. if (typeof provider === 'object') {
  41. // It's either a ProviderOptionsMap or a ProviderOptions
  42. if (provider.id) {
  43. const rawProvider = provider as ProviderOptions;
  44. invariant(
  45. rawProvider.id,
  46. 'You must specify an `id` on the Provider when you override options.prompts',
  47. );
  48. ret[rawProvider.id] = rawProvider.prompts || allPrompts;
  49. if (rawProvider.label) {
  50. ret[rawProvider.label] = rawProvider.prompts || allPrompts;
  51. }
  52. } else {
  53. const rawProvider = provider as ProviderOptionsMap;
  54. const originalId = Object.keys(rawProvider)[0];
  55. const providerObject = rawProvider[originalId];
  56. const id = providerObject.id || originalId;
  57. ret[id] = rawProvider[originalId].prompts || allPrompts;
  58. }
  59. }
  60. }
  61. return ret;
  62. }
  63. function maybeFilepath(str: string): boolean {
  64. return (
  65. !str.includes('\n') &&
  66. !str.includes('portkey://') &&
  67. !str.includes('langfuse://') &&
  68. (str.includes('/') ||
  69. str.includes('\\') ||
  70. str.includes('*') ||
  71. str.charAt(str.length - 3) === '.' ||
  72. str.charAt(str.length - 4) === '.')
  73. );
  74. }
  75. enum PromptInputType {
  76. STRING = 1,
  77. ARRAY = 2,
  78. NAMED = 3,
  79. }
  80. export async function readPrompts(
  81. promptPathOrGlobs: string | (string | Partial<Prompt>)[] | Record<string, string>,
  82. basePath: string = '',
  83. ): Promise<Prompt[]> {
  84. logger.debug(`Reading prompts from ${JSON.stringify(promptPathOrGlobs)}`);
  85. let promptPathInfos: { raw: string; resolved: string }[] = [];
  86. let promptContents: Prompt[] = [];
  87. let inputType: PromptInputType | undefined;
  88. let resolvedPath: string | undefined;
  89. const forceLoadFromFile = new Set<string>();
  90. const resolvedPathToDisplay = new Map<string, string>();
  91. if (typeof promptPathOrGlobs === 'string') {
  92. // Path to a prompt file
  93. if (promptPathOrGlobs.startsWith('file://')) {
  94. promptPathOrGlobs = promptPathOrGlobs.slice('file://'.length);
  95. // Ensure this path is not used as a raw prompt.
  96. forceLoadFromFile.add(promptPathOrGlobs);
  97. }
  98. resolvedPath = path.resolve(basePath, promptPathOrGlobs);
  99. promptPathInfos = [{ raw: promptPathOrGlobs, resolved: resolvedPath }];
  100. resolvedPathToDisplay.set(resolvedPath, promptPathOrGlobs);
  101. inputType = PromptInputType.STRING;
  102. } else if (Array.isArray(promptPathOrGlobs)) {
  103. // TODO(ian): Handle object array, such as OpenAI messages
  104. inputType = PromptInputType.ARRAY;
  105. promptPathInfos = promptPathOrGlobs.flatMap((pathOrGlob) => {
  106. let label;
  107. let rawPath: string;
  108. if (typeof pathOrGlob === 'object') {
  109. // Parse prompt config object {id, label}
  110. invariant(pathOrGlob.label, `Prompt object requires label, but got ${JSON.stringify(pathOrGlob)}`);
  111. label = pathOrGlob.label;
  112. invariant(pathOrGlob.id, `Prompt object requires id, but got ${JSON.stringify(pathOrGlob)}`);
  113. rawPath = pathOrGlob.id;
  114. inputType = PromptInputType.NAMED;
  115. } else {
  116. label = pathOrGlob;
  117. rawPath = pathOrGlob;
  118. }
  119. invariant(typeof rawPath === 'string', `Prompt path must be a string, but got ${JSON.stringify(rawPath)}`);
  120. if (rawPath.startsWith('file://')) {
  121. rawPath = rawPath.slice('file://'.length);
  122. // This path is explicitly marked as a file, ensure that it's not used as a raw prompt.
  123. forceLoadFromFile.add(rawPath);
  124. }
  125. resolvedPath = path.resolve(basePath, rawPath);
  126. resolvedPathToDisplay.set(resolvedPath, label);
  127. const globbedPaths = globSync(resolvedPath.replace(/\\/g, '/'), {
  128. windowsPathsNoEscape: true,
  129. });
  130. logger.debug(
  131. `Expanded prompt ${rawPath} to ${resolvedPath} and then to ${JSON.stringify(
  132. globbedPaths,
  133. )}`,
  134. );
  135. if (globbedPaths.length > 0) {
  136. return globbedPaths.map((globbedPath) => ({ raw: rawPath, resolved: globbedPath }));
  137. }
  138. // globSync will return empty if no files match, which is the case when the path includes a function name like: file.js:func
  139. return [{ raw: rawPath, resolved: resolvedPath }];
  140. });
  141. } else if (typeof promptPathOrGlobs === 'object') {
  142. // Display/contents mapping
  143. promptPathInfos = Object.keys(promptPathOrGlobs).map((key) => {
  144. resolvedPath = path.resolve(basePath, key);
  145. resolvedPathToDisplay.set(resolvedPath, (promptPathOrGlobs as Record<string, string>)[key]);
  146. return { raw: key, resolved: resolvedPath };
  147. });
  148. inputType = PromptInputType.NAMED;
  149. }
  150. logger.debug(`Resolved prompt paths: ${JSON.stringify(promptPathInfos)}`);
  151. for (const promptPathInfo of promptPathInfos) {
  152. const parsedPath = path.parse(promptPathInfo.resolved);
  153. let filename = parsedPath.base;
  154. let functionName: string | undefined;
  155. if (parsedPath.base.includes(':')) {
  156. const splits = parsedPath.base.split(':');
  157. if (
  158. splits[0] &&
  159. (splits[0].endsWith('.js') ||
  160. splits[0].endsWith('.cjs') ||
  161. splits[0].endsWith('.mjs') ||
  162. splits[0].endsWith('.py'))
  163. ) {
  164. [filename, functionName] = splits;
  165. }
  166. }
  167. const promptPath = path.join(parsedPath.dir, filename);
  168. let stat;
  169. let usedRaw = false;
  170. try {
  171. stat = fs.statSync(promptPath);
  172. } catch (err) {
  173. if (process.env.PROMPTFOO_STRICT_FILES || forceLoadFromFile.has(filename)) {
  174. throw err;
  175. }
  176. // If the path doesn't exist, it's probably a raw prompt
  177. promptContents.push({ raw: promptPathInfo.raw, label: promptPathInfo.raw });
  178. usedRaw = true;
  179. }
  180. if (usedRaw) {
  181. if (maybeFilepath(promptPathInfo.raw)) {
  182. // It looks like a filepath, so falling back could be a mistake. Print a warning
  183. logger.warn(
  184. `Could not find prompt file: "${chalk.red(filename)}". Treating it as a text prompt.`,
  185. );
  186. }
  187. } else if (stat?.isDirectory()) {
  188. // FIXME(ian): Make directory handling share logic with file handling.
  189. const filesInDirectory = fs.readdirSync(promptPath);
  190. const fileContents = filesInDirectory.map((fileName) => {
  191. const joinedPath = path.join(promptPath, fileName);
  192. resolvedPath = path.resolve(basePath, joinedPath);
  193. resolvedPathToDisplay.set(resolvedPath, joinedPath);
  194. return fs.readFileSync(resolvedPath, 'utf-8');
  195. });
  196. promptContents.push(...fileContents.map((content) => ({ raw: content, label: content })));
  197. } else {
  198. const ext = path.parse(promptPath).ext;
  199. if (ext === '.js' || ext === '.cjs' || ext === '.mjs') {
  200. const promptFunction = await importModule(promptPath, functionName);
  201. promptContents.push({
  202. raw: String(promptFunction),
  203. label: String(promptFunction),
  204. function: promptFunction,
  205. });
  206. } else if (ext === '.py') {
  207. const fileContent = fs.readFileSync(promptPath, 'utf-8');
  208. const promptFunction = async (context: {
  209. vars: Record<string, string | object>;
  210. provider?: ApiProvider;
  211. }) => {
  212. if (functionName) {
  213. return runPython(promptPath, functionName, [
  214. {
  215. ...context,
  216. provider: {
  217. id: context.provider?.id,
  218. label: context.provider?.label,
  219. },
  220. },
  221. ]);
  222. } else {
  223. // Legacy: run the whole file
  224. const options: PythonShellOptions = {
  225. mode: 'text',
  226. pythonPath: process.env.PROMPTFOO_PYTHON || 'python',
  227. args: [safeJsonStringify(context)],
  228. };
  229. logger.debug(`Executing python prompt script ${promptPath}`);
  230. const results = (await PythonShell.run(promptPath, options)).join('\n');
  231. logger.debug(`Python prompt script ${promptPath} returned: ${results}`);
  232. return results;
  233. }
  234. };
  235. let label = fileContent;
  236. if (inputType === PromptInputType.NAMED) {
  237. const resolvedPathLookup = functionName ? `${promptPath}:${functionName}` : promptPath;
  238. label = resolvedPathToDisplay.get(resolvedPathLookup) || resolvedPathLookup;
  239. }
  240. promptContents.push({
  241. raw: fileContent,
  242. label,
  243. function: promptFunction,
  244. });
  245. } else {
  246. const fileContent = fs.readFileSync(promptPath, 'utf-8');
  247. let label: string | undefined;
  248. if (inputType === PromptInputType.NAMED) {
  249. label = resolvedPathToDisplay.get(promptPath) || promptPath;
  250. } else {
  251. label = fileContent.length > 200 ? promptPath : fileContent;
  252. const ext = path.parse(promptPath).ext;
  253. if (ext === '.jsonl') {
  254. // Special case for JSONL file
  255. const jsonLines = fileContent.split(/\r?\n/).filter((line) => line.length > 0);
  256. for (const json of jsonLines) {
  257. promptContents.push({ raw: json, label: json });
  258. }
  259. continue;
  260. }
  261. }
  262. promptContents.push({ raw: fileContent, label });
  263. }
  264. }
  265. }
  266. if (
  267. promptContents.length === 1 &&
  268. inputType !== PromptInputType.NAMED &&
  269. !promptContents[0]['function']
  270. ) {
  271. // Split raw text file into multiple prompts
  272. const content = promptContents[0].raw;
  273. promptContents = content
  274. .split(PROMPT_DELIMITER)
  275. .map((p) => ({ raw: p.trim(), label: p.trim() }));
  276. }
  277. if (promptContents.length === 0) {
  278. throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
  279. }
  280. return promptContents;
  281. }
  282. export const DEFAULT_GRADING_PROMPT = JSON.stringify([
  283. {
  284. role: 'system',
  285. content: `You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. You respond with a JSON object with this structure: {pass: boolean; reason: string;}.
  286. Examples:
  287. Output: Hello world
  288. Rubric: Content contains a greeting
  289. {"pass": true, "score": 1.0, "reason": "the content contains the word 'world'"}
  290. Output: Avast ye swabs, repel the invaders!
  291. Rubric: Does not speak like a pirate
  292. {"pass": false, "score": 0.0, "reason": "'avast ye' is a common pirate term"}`,
  293. },
  294. {
  295. role: 'user',
  296. content: 'Output: {{ output }}\nRubric: {{ rubric }}',
  297. },
  298. ]);
  299. // https://github.com/openai/evals/blob/main/evals/registry/modelgraded/fact.yaml
  300. export const OPENAI_FACTUALITY_PROMPT = JSON.stringify([
  301. {
  302. role: 'system',
  303. content: `You are comparing a submitted answer to an expert answer on a given question. Here is the data:
  304. [BEGIN DATA]
  305. ************
  306. [Question]: {{input}}
  307. ************
  308. [Expert]: {{ideal}}
  309. ************
  310. [Submission]: {{completion}}
  311. ************
  312. [END DATA]
  313. Compare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.
  314. The submitted answer may either be a subset or superset of the expert answer, or it may conflict with it. Determine which case applies. Answer the question by selecting one of the following options:
  315. (A) The submitted answer is a subset of the expert answer and is fully consistent with it.
  316. (B) The submitted answer is a superset of the expert answer and is fully consistent with it.
  317. (C) The submitted answer contains all the same details as the expert answer.
  318. (D) There is a disagreement between the submitted answer and the expert answer.
  319. (E) The answers differ, but these differences don't matter from the perspective of factuality.`,
  320. },
  321. ]);
  322. export const OPENAI_CLOSED_QA_PROMPT = JSON.stringify([
  323. {
  324. role: 'system',
  325. content: `You are assessing a submitted answer on a given task based on a criterion. Here is the data:
  326. [BEGIN DATA]
  327. ***
  328. [Task]: {{input}}
  329. ***
  330. [Submission]: {{completion}}
  331. ***
  332. [Criterion]: {{criteria}}
  333. ***
  334. [END DATA]
  335. Does the submission meet the criterion? First, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer. At the end, repeat just the letter again by itself on a new line.
  336. Reasoning:`,
  337. },
  338. ]);
  339. export const SUGGEST_PROMPTS_SYSTEM_MESSAGE = {
  340. role: 'system',
  341. content: `You're helping a scientist who is tuning a prompt for a large language model. You will receive messages, and each message is a full prompt. Generate a candidate variation of the given prompt. This variation will be tested for quality in order to select a winner.
  342. Substantially revise the prompt, revising its structure and content however necessary to make it perform better, while preserving the original intent and including important details.
  343. Your output is going to be copied directly into the program. It should contain the prompt ONLY`,
  344. };
  345. export const REFUSAL_PROMPT_SYSTEM_MESSAGE = {
  346. role: 'system',
  347. content: `In this task, you will be given a string of text produced by a large language model. Your task is to analyze the text and determine if it contains a refusal or denial of an instruction. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
  348. };
  349. export const AI_SELF_REFERENCE_PROMPT_SYSTEM_MESSAGE = {
  350. role: 'system',
  351. content: `In this task, you will be given a string of text produced by a large language model. Analyze the text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, respond with 'True'. Otherwise, respond with 'False'.`,
  352. };
  353. export const SELECT_BEST_PROMPT = JSON.stringify([
  354. {
  355. role: 'system',
  356. content: `You are comparing multiple pieces of text to see which best fits the following criteria: {{criteria}}
  357. Here are the pieces of text:
  358. {% for output in outputs %}
  359. <Text index="{{ loop.index0 }}">
  360. {{ output }}
  361. </Text>
  362. {% endfor %}
  363. Output the index of the text that best fits the criteria. You must output a single integer.`,
  364. },
  365. ]);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...