Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

promptfooconfig.yaml 5.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  1. # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
  2. description: 'Adaline Gateway Structured Output Example'
  3. prompts:
  4. - 'Analyze the following customer support query: "{{query}}"'
  5. providers:
  6. - id: adaline:openai:chat:gpt-4.1-mini
  7. config:
  8. seed: 322431
  9. responseFormat: json_schema
  10. responseSchema:
  11. name: customer_support_analysis
  12. strict: true
  13. description: 'output schema for analysis of a customer support query'
  14. schema:
  15. type: object
  16. properties:
  17. query_summary:
  18. type: string
  19. description: "A brief summary of the customer's query"
  20. category:
  21. type: string
  22. enum:
  23. [
  24. 'billing',
  25. 'technical_issue',
  26. 'product_inquiry',
  27. 'complaint',
  28. 'feature_request',
  29. 'other',
  30. ]
  31. description: "The main category of the customer's query"
  32. sentiment:
  33. type: string
  34. enum: ['positive', 'neutral', 'negative']
  35. description: "The overall sentiment of the customer's query"
  36. urgency:
  37. type: string
  38. enum: ['1', '2', '3', '4', '5']
  39. description: 'The urgency level of the query, where 1 is lowest and 5 is highest'
  40. suggested_actions:
  41. type: array
  42. items:
  43. type: object
  44. properties:
  45. action:
  46. type: string
  47. description: 'A specific action to be taken'
  48. priority:
  49. type: string
  50. enum: ['low', 'medium', 'high']
  51. required: ['action', 'priority']
  52. additionalProperties: false
  53. estimated_resolution_time:
  54. type: string
  55. description: "Estimated time to resolve the query (e.g., '2 hours', '1 day')"
  56. required:
  57. [
  58. 'query_summary',
  59. 'category',
  60. 'sentiment',
  61. 'urgency',
  62. 'suggested_actions',
  63. 'estimated_resolution_time',
  64. ]
  65. additionalProperties: false
  66. tests:
  67. - vars:
  68. query: "I've been charged twice for my subscription this month. Can you please refund the extra charge?"
  69. assert:
  70. - type: is-json
  71. metric: ValidJSON
  72. - type: javascript
  73. value: output.category === 'billing'
  74. metric: CategoryAccuracy
  75. - type: javascript
  76. value: output.sentiment === 'negative'
  77. metric: SentimentAccuracy
  78. - type: javascript
  79. value: parseInt(output.urgency) >= 3
  80. metric: UrgencyAccuracy
  81. - type: javascript
  82. value: output.suggested_actions.length > 0 && output.suggested_actions.some(action => action.action.toLowerCase().includes('refund'))
  83. metric: ActionRelevance
  84. - type: llm-rubric
  85. value: "Does the query summary accurately reflect the customer's issue about being charged twice?"
  86. metric: SummaryAccuracy
  87. - vars:
  88. query: "How do I change my password? I can't find the option in my account settings."
  89. assert:
  90. - type: is-json
  91. metric: ValidJSON
  92. - type: javascript
  93. value: output.category === 'technical_issue'
  94. metric: CategoryAccuracy
  95. - type: javascript
  96. value: output.sentiment === 'neutral'
  97. metric: SentimentAccuracy
  98. - type: javascript
  99. value: parseInt(output.urgency) <= 3
  100. metric: UrgencyAccuracy
  101. - type: javascript
  102. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('password'))
  103. metric: ActionRelevance
  104. - type: llm-rubric
  105. value: "Does the query summary accurately reflect the customer's issue about changing their password?"
  106. metric: SummaryAccuracy
  107. - vars:
  108. query: "I love your new feature! It's made my work so much easier. Any plans to expand on it?"
  109. assert:
  110. - type: is-json
  111. metric: ValidJSON
  112. - type: javascript
  113. value: output.category === 'feature_request'
  114. metric: CategoryAccuracy
  115. - type: javascript
  116. value: output.sentiment === 'positive'
  117. metric: SentimentAccuracy
  118. - type: javascript
  119. value: parseInt(output.urgency) <= 2
  120. metric: UrgencyAccuracy
  121. - type: javascript
  122. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('feedback'))
  123. metric: ActionRelevance
  124. - type: llm-rubric
  125. value: "Does the query summary accurately reflect the customer's positive feedback and interest in feature expansion?"
  126. metric: SummaryAccuracy
  127. - vars:
  128. query: "Your product is terrible and never works! I want a full refund and I'm cancelling my account!"
  129. assert:
  130. - type: is-json
  131. metric: ValidJSON
  132. - type: javascript
  133. value: output.category === 'complaint'
  134. metric: CategoryAccuracy
  135. - type: javascript
  136. value: output.sentiment === 'negative'
  137. metric: SentimentAccuracy
  138. - type: javascript
  139. value: |
  140. output.urgency === '5'
  141. metric: UrgencyAccuracy
  142. - type: javascript
  143. value: output.suggested_actions.some(action => action.priority === 'high')
  144. metric: ActionRelevance
  145. - type: llm-rubric
  146. value: "Does the query summary accurately reflect the customer's severe complaint and refund request?"
  147. metric: SummaryAccuracy
  148. derivedMetrics:
  149. - name: 'OverallAccuracy'
  150. value: '(CategoryAccuracy + SentimentAccuracy + UrgencyAccuracy + ActionRelevance + SummaryAccuracy) / 5'
  151. - name: 'ResponseQuality'
  152. value: '(ValidJSON + OverallAccuracy) / 2'
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...