Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

index.ts 3.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  1. import invariant from 'tiny-invariant';
  2. import assertions from './assertions';
  3. import providers, { loadApiProvider } from './providers';
  4. import telemetry from './telemetry';
  5. import { disableCache } from './cache';
  6. import { evaluate as doEvaluate } from './evaluator';
  7. import { loadApiProviders } from './providers';
  8. import { readTests } from './testCases';
  9. import {
  10. readFilters,
  11. writeResultsToDatabase,
  12. writeMultipleOutputs,
  13. writeOutput,
  14. migrateResultsFromFileSystemToDatabase,
  15. } from './util';
  16. import type {
  17. EvaluateOptions,
  18. TestSuite,
  19. EvaluateTestSuite,
  20. ProviderOptions,
  21. PromptFunction,
  22. } from './types';
  23. import { readPrompts } from './prompts';
  24. export * from './types';
  25. export { generateTable } from './table';
  26. async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions = {}) {
  27. const constructedTestSuite: TestSuite = {
  28. ...testSuite,
  29. providers: await loadApiProviders(testSuite.providers, {
  30. env: testSuite.env,
  31. }),
  32. tests: await readTests(testSuite.tests),
  33. nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
  34. // Full prompts expected (not filepaths)
  35. prompts: (
  36. await Promise.all(
  37. testSuite.prompts.map(async (promptInput) => {
  38. if (typeof promptInput === 'function') {
  39. return {
  40. raw: promptInput.toString(),
  41. label: promptInput.toString(),
  42. function: promptInput as PromptFunction,
  43. };
  44. } else if (typeof promptInput === 'string') {
  45. const prompts = await readPrompts(promptInput);
  46. return prompts.map((p) => ({
  47. raw: p.raw,
  48. label: p.label,
  49. }));
  50. } else {
  51. return {
  52. raw: JSON.stringify(promptInput),
  53. label: JSON.stringify(promptInput),
  54. };
  55. }
  56. }),
  57. )
  58. ).flat(),
  59. };
  60. // Resolve nested providers
  61. for (const test of constructedTestSuite.tests || []) {
  62. if (test.options?.provider && typeof test.options.provider === 'function') {
  63. test.options.provider = await loadApiProvider(test.options.provider);
  64. }
  65. if (test.assert) {
  66. for (const assertion of test.assert) {
  67. if (assertion.type === 'assert-set' || typeof assertion.provider === 'function') {
  68. continue;
  69. }
  70. if (assertion.provider) {
  71. if (typeof assertion.provider === 'object') {
  72. const casted = assertion.provider as ProviderOptions;
  73. invariant(casted.id, 'Provider object must have an id');
  74. assertion.provider = await loadApiProvider(casted.id, { options: casted });
  75. } else if (typeof assertion.provider === 'string') {
  76. assertion.provider = await loadApiProvider(assertion.provider);
  77. } else {
  78. throw new Error('Invalid provider type');
  79. }
  80. }
  81. }
  82. }
  83. }
  84. // Other settings
  85. if (options.cache === false || (options.repeat && options.repeat > 1)) {
  86. disableCache();
  87. }
  88. telemetry.maybeShowNotice();
  89. // Run the eval!
  90. const ret = await doEvaluate(constructedTestSuite, {
  91. eventSource: 'library',
  92. ...options,
  93. });
  94. if (testSuite.outputPath) {
  95. if (typeof testSuite.outputPath === 'string') {
  96. await writeOutput(testSuite.outputPath, ret, testSuite, null);
  97. } else if (Array.isArray(testSuite.outputPath)) {
  98. await writeMultipleOutputs(testSuite.outputPath, ret, testSuite, null);
  99. }
  100. }
  101. if (testSuite.writeLatestResults) {
  102. await migrateResultsFromFileSystemToDatabase();
  103. await writeResultsToDatabase(ret, testSuite);
  104. }
  105. await telemetry.send();
  106. return ret;
  107. }
  108. export { evaluate, assertions, providers };
  109. export default {
  110. evaluate,
  111. assertions,
  112. providers,
  113. };
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...