Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

promptfooconfig.yaml 5.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  1. prompts:
  2. - 'Analyze the following customer support query: "{{query}}"'
  3. providers:
  4. - id: adaline:openai:chat:gpt-4o-mini
  5. config:
  6. seed: 322431
  7. responseFormat: json_schema
  8. responseSchema:
  9. name: customer_support_analysis
  10. strict: true
  11. description: 'output schema for analysis of a customer support query'
  12. schema:
  13. type: object
  14. properties:
  15. query_summary:
  16. type: string
  17. description: "A brief summary of the customer's query"
  18. category:
  19. type: string
  20. enum:
  21. [
  22. 'billing',
  23. 'technical_issue',
  24. 'product_inquiry',
  25. 'complaint',
  26. 'feature_request',
  27. 'other',
  28. ]
  29. description: "The main category of the customer's query"
  30. sentiment:
  31. type: string
  32. enum: ['positive', 'neutral', 'negative']
  33. description: "The overall sentiment of the customer's query"
  34. urgency:
  35. type: string
  36. enum: ['1', '2', '3', '4', '5']
  37. description: 'The urgency level of the query, where 1 is lowest and 5 is highest'
  38. suggested_actions:
  39. type: array
  40. items:
  41. type: object
  42. properties:
  43. action:
  44. type: string
  45. description: 'A specific action to be taken'
  46. priority:
  47. type: string
  48. enum: ['low', 'medium', 'high']
  49. required: ['action', 'priority']
  50. additionalProperties: false
  51. estimated_resolution_time:
  52. type: string
  53. description: "Estimated time to resolve the query (e.g., '2 hours', '1 day')"
  54. required:
  55. [
  56. 'query_summary',
  57. 'category',
  58. 'sentiment',
  59. 'urgency',
  60. 'suggested_actions',
  61. 'estimated_resolution_time',
  62. ]
  63. additionalProperties: false
  64. tests:
  65. - vars:
  66. query: "I've been charged twice for my subscription this month. Can you please refund the extra charge?"
  67. assert:
  68. - type: is-json
  69. metric: ValidJSON
  70. - type: javascript
  71. value: output.category === 'billing'
  72. metric: CategoryAccuracy
  73. - type: javascript
  74. value: output.sentiment === 'negative'
  75. metric: SentimentAccuracy
  76. - type: javascript
  77. value: parseInt(output.urgency) >= 3
  78. metric: UrgencyAccuracy
  79. - type: javascript
  80. value: output.suggested_actions.length > 0 && output.suggested_actions.some(action => action.action.toLowerCase().includes('refund'))
  81. metric: ActionRelevance
  82. - type: llm-rubric
  83. value: "Does the query summary accurately reflect the customer's issue about being charged twice?"
  84. metric: SummaryAccuracy
  85. - vars:
  86. query: "How do I change my password? I can't find the option in my account settings."
  87. assert:
  88. - type: is-json
  89. metric: ValidJSON
  90. - type: javascript
  91. value: output.category === 'technical_issue'
  92. metric: CategoryAccuracy
  93. - type: javascript
  94. value: output.sentiment === 'neutral'
  95. metric: SentimentAccuracy
  96. - type: javascript
  97. value: parseInt(output.urgency) <= 3
  98. metric: UrgencyAccuracy
  99. - type: javascript
  100. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('password'))
  101. metric: ActionRelevance
  102. - type: llm-rubric
  103. value: "Does the query summary accurately reflect the customer's issue about changing their password?"
  104. metric: SummaryAccuracy
  105. - vars:
  106. query: "I love your new feature! It's made my work so much easier. Any plans to expand on it?"
  107. assert:
  108. - type: is-json
  109. metric: ValidJSON
  110. - type: javascript
  111. value: output.category === 'feature_request'
  112. metric: CategoryAccuracy
  113. - type: javascript
  114. value: output.sentiment === 'positive'
  115. metric: SentimentAccuracy
  116. - type: javascript
  117. value: parseInt(output.urgency) <= 2
  118. metric: UrgencyAccuracy
  119. - type: javascript
  120. value: output.suggested_actions.some(action => action.action.toLowerCase().includes('feedback'))
  121. metric: ActionRelevance
  122. - type: llm-rubric
  123. value: "Does the query summary accurately reflect the customer's positive feedback and interest in feature expansion?"
  124. metric: SummaryAccuracy
  125. - vars:
  126. query: "Your product is terrible and never works! I want a full refund and I'm cancelling my account!"
  127. assert:
  128. - type: is-json
  129. metric: ValidJSON
  130. - type: javascript
  131. value: output.category === 'complaint'
  132. metric: CategoryAccuracy
  133. - type: javascript
  134. value: output.sentiment === 'negative'
  135. metric: SentimentAccuracy
  136. - type: javascript
  137. value: |
  138. output.urgency === '5'
  139. metric: UrgencyAccuracy
  140. - type: javascript
  141. value: output.suggested_actions.some(action => action.priority === 'high')
  142. metric: ActionRelevance
  143. - type: llm-rubric
  144. value: "Does the query summary accurately reflect the customer's severe complaint and refund request?"
  145. metric: SummaryAccuracy
  146. derivedMetrics:
  147. - name: 'OverallAccuracy'
  148. value: '(CategoryAccuracy + SentimentAccuracy + UrgencyAccuracy + ActionRelevance + SummaryAccuracy) / 5'
  149. - name: 'ResponseQuality'
  150. value: '(ValidJSON + OverallAccuracy) / 2'
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...