Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

hooks.py 4.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  1. """Extension hooks for promptfoo.
  2. This module provides functionality for handling extension hooks in promptfoo.
  3. It allows for executing custom actions before and after test suites and
  4. individual evaluations, as well as running setup and teardown commands.
  5. """
  6. import logging
  7. import os
  8. from datetime import datetime
  9. from typing import Optional
  10. # Set up logging only if it hasn't been set up already
  11. if not logging.getLogger().handlers:
  12. current_dir = os.path.dirname(os.path.abspath(__file__))
  13. log_filename = f"promptfoo_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
  14. log_path = os.path.join(current_dir, log_filename)
  15. logging.basicConfig(
  16. filename=log_path,
  17. level=logging.INFO,
  18. format="%(asctime)s - %(message)s",
  19. datefmt="%Y-%m-%d %H:%M:%S",
  20. )
  21. counter = 0
  22. def extension_hook(hook_name: str, context: dict) -> Optional[dict]:
  23. """Handles different extension hooks for promptfoo.
  24. This function is called at various points during the test execution process.
  25. It logs information about the test suite and individual tests.
  26. Args:
  27. hook_name (str): The name of the hook being called. Can be one of
  28. "beforeAll", "beforeEach", "afterEach", or "afterAll".
  29. context (dict): A dictionary containing contextual information for the hook.
  30. The contents of this dictionary vary depending on the hook being called.
  31. Returns:
  32. context (Optional[dict]): The "beforeAll" and "beforeEach" hooks should return the context object,
  33. while the "afterAll" and "afterEach" hooks should not return anything.
  34. Global Variables:
  35. counter (int): Keeps track of the number of tests completed.
  36. Logs:
  37. Information about the test suite and individual tests, including setup,
  38. completion, results, and token usage.
  39. """
  40. global counter
  41. if hook_name == "beforeAll":
  42. suite = context.get("suite", {})
  43. logging.info(
  44. f"Setting up test suite: {suite.get('description') or 'Unnamed suite'}"
  45. )
  46. logging.info(f"Total prompts: {len(suite.get('prompts', []))}")
  47. logging.info(f"Total providers: {len(suite.get('providers', []))}")
  48. logging.info(f"Total tests: {len(suite.get('tests', []))}")
  49. # Add an additional test case to the suite:
  50. context["suite"]["tests"].append(
  51. {
  52. "vars": {
  53. "body": "It's a beautiful day",
  54. "language": "Spanish",
  55. },
  56. "assert": [{"type": "contains", "value": "Es un día hermoso."}],
  57. }
  58. )
  59. # Add an additional default assertion to the suite:
  60. context["suite"]["defaultTest"]["assert"].append({"type": "is-json"})
  61. return context
  62. elif hook_name == "beforeEach":
  63. logging.info("Preparing test")
  64. # All languages are now pirate:
  65. context["test"]["vars"]["language"] = (
  66. f"Pirate {context['test']['vars']['language']}"
  67. )
  68. return context
  69. elif hook_name == "afterEach":
  70. result = context.get("result", {})
  71. result_str = ""
  72. if result:
  73. success = "Pass" if result.get("success") else "Fail"
  74. score = result.get("score", 0)
  75. result_str = f", Result: {success}, Score: {score}"
  76. logging.info(f"Completed test {counter}{result_str}")
  77. # Access sessionId if available (from multi-turn conversations or stateful tests)
  78. session_id = result.get("metadata", {}).get("sessionId")
  79. if session_id:
  80. logging.info(f"Session ID: {session_id}")
  81. session_ids = result.get("metadata", {}).get("sessionIds")
  82. if session_ids:
  83. logging.info(f"Session IDs: {session_ids}")
  84. counter += 1
  85. elif hook_name == "afterAll":
  86. results = context.get("results", [])
  87. logging.info("Test suite completed")
  88. logging.info(f"Total tests run: {len(results)}")
  89. successes = sum(1 for r in results if r.get("success"))
  90. failures = sum(1 for r in results if not r.get("success"))
  91. logging.info(f"Successes: {successes}")
  92. logging.info(f"Failures: {failures}")
  93. total_token_usage = sum(
  94. r.get("response", {}).get("tokenUsage", {}).get("total", 0) for r in results
  95. )
  96. logging.info(f"Total token usage: {total_token_usage}")
  97. logging.info("") # Add a blank line for readability between hooks
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...