Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

promptfooconfig.yaml 5.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  1. # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
  2. description: 'OpenAI Structured Output Example'
  3. prompts:
  4. - 'Analyze the following customer support query: "{{query}}"'
  5. providers:
  6. - id: openai:chat:gpt-4o-2024-11-20
  7. config: &structured_output_config
  8. response_format:
  9. type: json_schema
  10. json_schema:
  11. name: customer_support_analysis
  12. strict: true
  13. schema:
  14. type: object
  15. properties:
  16. query_summary:
  17. type: string
  18. description: "A brief summary of the customer's query"
  19. category:
  20. type: string
  21. enum:
  22. [
  23. 'billing',
  24. 'technical_issue',
  25. 'product_inquiry',
  26. 'complaint',
  27. 'feature_request',
  28. 'other',
  29. ]
  30. description: "The main category of the customer's query"
  31. sentiment:
  32. type: string
  33. enum: ['positive', 'neutral', 'negative']
  34. description: "The overall sentiment of the customer's query"
  35. urgency:
  36. type: string
  37. enum: ['1', '2', '3', '4', '5']
  38. description: 'The urgency level of the query, where 1 is lowest and 5 is highest'
  39. suggested_actions:
  40. type: array
  41. items:
  42. type: object
  43. properties:
  44. action:
  45. type: string
  46. description: 'A specific action to be taken'
  47. priority:
  48. type: string
  49. enum: ['low', 'medium', 'high']
  50. required: ['action', 'priority']
  51. additionalProperties: false
  52. estimated_resolution_time:
  53. type: string
  54. description: "Estimated time to resolve the query (e.g., '2 hours', '1 day')"
  55. required:
  56. [
  57. 'query_summary',
  58. 'category',
  59. 'sentiment',
  60. 'urgency',
  61. 'suggested_actions',
  62. 'estimated_resolution_time',
  63. ]
  64. additionalProperties: false
  65. - id: openai:chat:gpt-4o-mini
  66. config: *structured_output_config
  67. tests:
  68. - vars:
  69. query: "I've been charged twice for my subscription this month. Can you please refund the extra charge?"
  70. assert:
  71. - type: is-json
  72. metric: ValidJSON
  73. - type: javascript
  74. value: |
  75. output.category === 'billing'
  76. metric: CategoryAccuracy
  77. - type: javascript
  78. value: output.sentiment === 'negative'
  79. metric: SentimentAccuracy
  80. - type: javascript
  81. value: parseInt(output.urgency) >= 3
  82. metric: UrgencyAccuracy
  83. - type: javascript
  84. value: output.suggested_actions.length > 0 && output.suggested_actions.some(action => action.action.toLowerCase().includes('refund'))
  85. metric: ActionRelevance
  86. - type: llm-rubric
  87. value: "Does the query summary accurately reflect the customer's issue about being charged twice?"
  88. metric: SummaryAccuracy
  89. - vars:
  90. query: "How do I change my password? I can't find the option in my account settings."
  91. assert:
  92. - type: is-json
  93. metric: ValidJSON
  94. - type: javascript
  95. value: output.category === 'technical_issue'
  96. metric: CategoryAccuracy
  97. - type: javascript
  98. value: output.sentiment === 'neutral'
  99. metric: SentimentAccuracy
  100. - type: javascript
  101. value: parseInt(output.urgency) <= 3
  102. metric: UrgencyAccuracy
  103. - type: javascript
  104. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('password'))
  105. metric: ActionRelevance
  106. - type: llm-rubric
  107. value: "Does the query summary accurately reflect the customer's issue about changing their password?"
  108. metric: SummaryAccuracy
  109. - vars:
  110. query: "I love your new feature! It's made my work so much easier. Any plans to expand on it?"
  111. assert:
  112. - type: is-json
  113. metric: ValidJSON
  114. - type: javascript
  115. value: output.category === 'feature_request'
  116. metric: CategoryAccuracy
  117. - type: javascript
  118. value: output.sentiment === 'positive'
  119. metric: SentimentAccuracy
  120. - type: javascript
  121. value: parseInt(output.urgency) <= 2
  122. metric: UrgencyAccuracy
  123. - type: javascript
  124. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('feedback'))
  125. metric: ActionRelevance
  126. - type: llm-rubric
  127. value: "Does the query summary accurately reflect the customer's positive feedback and interest in feature expansion?"
  128. metric: SummaryAccuracy
  129. - vars:
  130. query: "Your product is terrible and never works! I want a full refund and I'm cancelling my account!"
  131. assert:
  132. - type: is-json
  133. metric: ValidJSON
  134. - type: javascript
  135. value: output.category === 'complaint'
  136. metric: CategoryAccuracy
  137. - type: javascript
  138. value: output.sentiment === 'negative'
  139. metric: SentimentAccuracy
  140. - type: javascript
  141. value: |
  142. output.urgency === '5'
  143. metric: UrgencyAccuracy
  144. - type: javascript
  145. value: output.suggested_actions.some(action => action.priority === 'high')
  146. metric: ActionRelevance
  147. - type: llm-rubric
  148. value: "Does the query summary accurately reflect the customer's severe complaint and refund request?"
  149. metric: SummaryAccuracy
  150. derivedMetrics:
  151. - name: 'OverallAccuracy'
  152. value: '(CategoryAccuracy + SentimentAccuracy + UrgencyAccuracy + ActionRelevance + SummaryAccuracy) / 5'
  153. - name: 'ResponseQuality'
  154. value: '(ValidJSON + OverallAccuracy) / 2'
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...