Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

promptfooconfig.yaml 2.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  1. # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
  2. description: IMDB Review Sentiment Analysis
  3. providers:
  4. - openai:gpt-4o-mini
  5. prompts:
  6. - label: Sentiment Analysis
  7. raw: |
  8. Analyze the sentiment of the following movie review. Classify it as either positive or negative.
  9. Review: "{{text}}"
  10. Respond with a JSON object in the following format:
  11. {
  12. "sentiment": "positive" or "negative",
  13. "confidence": number between 1-10,
  14. "reasoning": "brief explanation"
  15. }
  16. config:
  17. response_format:
  18. type: json_schema
  19. json_schema:
  20. name: MovieReviewSentiment
  21. schema:
  22. type: object
  23. properties:
  24. sentiment:
  25. type: string
  26. enum: ['positive', 'negative']
  27. confidence:
  28. type: integer
  29. minimum: 1
  30. maximum: 10
  31. reasoning:
  32. type: string
  33. required: ['sentiment', 'confidence', 'reasoning']
  34. defaultTest:
  35. assert:
  36. # Basic JSON validation
  37. - type: is-json
  38. # Track binary classification metrics
  39. - type: javascript
  40. value: 'output.sentiment === context.vars.sentiment'
  41. metric: accuracy
  42. # For F1 score components (treating 'positive' as the positive class)
  43. - type: javascript
  44. value: "output.sentiment === 'positive' && context.vars.sentiment === 'positive' ? 1 : 0"
  45. metric: true_positives
  46. weight: 0
  47. - type: javascript
  48. value: "output.sentiment === 'positive' && context.vars.sentiment === 'negative' ? 1 : 0"
  49. metric: false_positives
  50. weight: 0
  51. - type: javascript
  52. value: "output.sentiment === 'negative' && context.vars.sentiment === 'positive' ? 1 : 0"
  53. metric: false_negatives
  54. weight: 0
  55. - type: javascript
  56. value: "output.sentiment === 'negative' && context.vars.sentiment === 'negative' ? 1 : 0"
  57. metric: true_negatives
  58. weight: 0
  59. derivedMetrics:
  60. # Precision = TP / (TP + FP)
  61. - name: precision
  62. value: true_positives / (true_positives + false_positives)
  63. # Recall = TP / (TP + FN)
  64. - name: recall
  65. value: true_positives / (true_positives + false_negatives)
  66. # F1 Score = 2 * (precision * recall) / (precision + recall)
  67. - name: f1_score
  68. value: 2 * true_positives / (2 * true_positives + false_positives + false_negatives)
  69. # Accuracy = (TP + TN) / (TP + TN + FP + FN)
  70. - name: accuracy_score
  71. value: (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)
  72. tests: file://imdb_eval_sample.csv
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...