Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

validate_and_run_code.py 1.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  1. import re
  2. import epicbox
  3. # Replace with your preferred Docker image
  4. DOCKER_IMAGE = "python:3.9-alpine"
  5. def get_assert(output, context):
  6. # Extract the Python function from the LLM output
  7. function_match = re.search(r"```python\s*\n(def\s+.*?)\n```", output, re.DOTALL)
  8. if not function_match:
  9. return {"pass": False, "score": 0, "reason": "No function definition found"}
  10. function_code = function_match.group(1)
  11. # Configure epicbox
  12. epicbox.configure(profiles=[epicbox.Profile("python", DOCKER_IMAGE)])
  13. # Get the function name, test input, and expected output from the context
  14. function_name = context["vars"]["function_name"]
  15. test_input = context["vars"]["test_input"]
  16. expected_output = context["vars"]["expected_output"]
  17. # Prepare the code to run in the sandbox
  18. test_code = f"""
  19. {function_code}
  20. # Test the function
  21. result = {function_name}({test_input})
  22. print(result)
  23. """
  24. files = [{"name": "main.py", "content": test_code.encode("utf-8")}]
  25. limits = {"cputime": 1, "memory": 64}
  26. # Run the code in the sandbox
  27. result = epicbox.run("python", "python main.py", files=files, limits=limits)
  28. if result["exit_code"] != 0:
  29. return {
  30. "pass": False,
  31. "score": 0,
  32. "reason": f"Execution error: {result['stderr'].decode('utf-8')}",
  33. }
  34. # Compare the output with the expected result
  35. actual_output = result["stdout"].decode("utf-8").strip()
  36. if actual_output == str(expected_output):
  37. return {
  38. "pass": True,
  39. "score": 1,
  40. "reason": f"Correct output: got {expected_output}",
  41. }
  42. else:
  43. return {
  44. "pass": False,
  45. "score": 0,
  46. "reason": f"Incorrect output. Expected: {expected_output}, Got: {actual_output}",
  47. }
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...