nirbarazida
/
promptfoo
mirror of https://github.com/promptfoo/promptfoo


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
            # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
description: Google AI Studio Gemini model evaluation

prompts:
  - |
    Given this math puzzle: {{puzzle}}
    Please solve this step by step, showing your reasoning process.

providers:
  - id: google:gemini-2.5-pro
    config:
      generationConfig:
        temperature: 0.7
        maxOutputTokens: 4096
        thinkingConfig:
          thinkingBudget: 2048 # Enhanced thinking for complex reasoning

  - id: google:gemini-2.5-flash
    config:
      generationConfig:
        temperature: 0.7
        maxOutputTokens: 2048
        thinkingConfig:
          thinkingBudget: 1024 # Enhanced reasoning and thinking capabilities

  - id: google:gemini-2.5-flash-lite
    config:
      generationConfig:
        temperature: 0.7
        maxOutputTokens: 1024
        thinkingConfig:
          thinkingBudget: 512 # Most cost-efficient and fastest 2.5 model

  - google:gemini-2.0-flash-exp

  - google:gemini-2.0-flash-thinking-exp

  - id: google:gemini-2.5-pro
    config:
      temperature: 0.7
      maxOutputTokens: 1024
      topP: 0.9
      topK: 40

  # System instruction from file example
  - id: google:gemini-2.5-pro
    label: gemini-with-system-instruction-file
    config:
      temperature: 0.3
      maxOutputTokens: 1024
      systemInstruction: file://system-instruction.txt

  # Structured output example
  - id: google:gemini-2.5-pro
    config:
      generationConfig:
        temperature: 0
        maxOutputTokens: 1024
        response_mime_type: 'application/json'
        response_schema:
          type: 'object'
          properties:
            answer:
              type: 'string'
              description: 'The numerical or text answer'
            confidence:
              type: 'string'
              enum: ['high', 'medium', 'low']
            method_used:
              type: 'string'
              description: 'One-word description of solving method'
          required: ['answer', 'confidence', 'method_used']

  # Function calling example
  - id: google:gemini-2.5-pro
    config:
      tools:
        function_declarations:
          - name: 'verify_answer'
            description: 'Verify if the answer is correct'
            parameters:
              type: 'object'
              properties:
                answer:
                  type: 'string'
                  description: 'The proposed answer'
                explanation:
                  type: 'string'
                  description: 'Brief explanation'
              required: ['answer']
      tool_config:
        function_calling_config:
          mode: 'auto'
defaultTest:
  options:
    provider:
      text:
        id: google:gemini-2.5-flash
      embedding:
        id: google:embedding:text-embedding-004
tests:
  - vars:
      puzzle: 'If a train travels at 60 mph for 2.5 hours, then at 40 mph for 1.5 hours, what is the average speed for the entire journey?'
    assert:
      - type: contains
        value: 52.5
      - type: llm-rubric
        value: |
          The answer should be 52.5 mph.
      - type: similar
        value: 52.5 mph