Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

create_rigorous_charts.py 8.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  1. #!/usr/bin/env python3
  2. """
  3. Create rigorous, professional charts for AI bias evaluation article
  4. Removing sensationalistic language and ensuring data accuracy
  5. """
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. def create_primary_findings_chart():
  9. """Create a clean chart showing the primary findings"""
  10. fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 8))
  11. # Left panel: Uncertainty acknowledgment change
  12. models = ['GPT-3.5-turbo\n(March 2023)', 'GPT-5-nano\n(August 2025)']
  13. unknown_rates = [13.1, 29.9]
  14. colors = ['#E63946', '#06D6A0']
  15. bars1 = ax1.bar(models, unknown_rates, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
  16. # Add value labels
  17. for i, (bar, rate) in enumerate(zip(bars1, unknown_rates)):
  18. ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
  19. f'{rate}%', ha='center', va='bottom', fontweight='bold', fontsize=12)
  20. # Add difference annotation
  21. ax1.annotate('', xy=(1, 29.9), xytext=(0, 13.1),
  22. arrowprops=dict(arrowstyle='<->', color='blue', lw=2))
  23. ax1.text(0.5, 21.5, '+16.8\npercentage\npoints', ha='center', va='center',
  24. fontweight='bold', fontsize=11, color='blue',
  25. bbox=dict(boxstyle="round,pad=0.3", facecolor='lightblue', alpha=0.8))
  26. ax1.set_ylabel('Uncertainty Acknowledgment Rate (%)', fontsize=12, fontweight='bold')
  27. ax1.set_title('Ambiguous Context Behavior\n"Unknown" Selection Rate', fontsize=14, fontweight='bold')
  28. ax1.set_ylim(0, 35)
  29. ax1.grid(axis='y', alpha=0.3)
  30. # Right panel: Overall accuracy comparison
  31. overall_rates = [53.0, 61.2]
  32. bars2 = ax2.bar(models, overall_rates, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
  33. for i, (bar, rate) in enumerate(zip(bars2, overall_rates)):
  34. ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
  35. f'{rate}%', ha='center', va='bottom', fontweight='bold', fontsize=12)
  36. ax2.annotate('', xy=(1, 61.2), xytext=(0, 53.0),
  37. arrowprops=dict(arrowstyle='<->', color='green', lw=2))
  38. ax2.text(0.5, 57.1, '+8.2\npercentage\npoints', ha='center', va='center',
  39. fontweight='bold', fontsize=11, color='green',
  40. bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgreen', alpha=0.8))
  41. ax2.set_ylabel('Overall Accuracy Rate (%)', fontsize=12, fontweight='bold')
  42. ax2.set_title('Overall Performance\nCombined Accuracy', fontsize=14, fontweight='bold')
  43. ax2.set_ylim(45, 65)
  44. ax2.grid(axis='y', alpha=0.3)
  45. plt.suptitle('AI Bias Evaluation: Key Behavioral Changes\nGPT-3.5-turbo to GPT-5-nano (116,972 tests)',
  46. fontsize=16, fontweight='bold', y=0.98)
  47. plt.tight_layout()
  48. plt.savefig('/Users/mdangelo/projects/pf2/site/static/img/blog/bbq-bias/primary-findings-rigorous.png',
  49. dpi=300, bbox_inches='tight', facecolor='white')
  50. plt.close()
  51. def create_development_timeline_chart():
  52. """Create a clean timeline chart without sensationalistic framing"""
  53. fig, ax = plt.subplots(1, 1, figsize=(14, 8))
  54. # Data points
  55. models = ['GPT-3.5-turbo', 'GPT-5-nano']
  56. overall_accuracy = [53.0, 61.2]
  57. ambiguous_unknown = [13.1, 29.9]
  58. x_pos = [0, 1]
  59. width = 0.35
  60. # Create bars
  61. bars1 = ax.bar([p - width/2 for p in x_pos], overall_accuracy, width,
  62. label='Overall Accuracy', color='#2E86AB', alpha=0.8)
  63. bars2 = ax.bar([p + width/2 for p in x_pos], ambiguous_unknown, width,
  64. label='Uncertainty Acknowledgment\n(Ambiguous Contexts)', color='#A23B72', alpha=0.8)
  65. # Add value labels
  66. for bars in [bars1, bars2]:
  67. for bar in bars:
  68. height = bar.get_height()
  69. ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
  70. f'{height}%', ha='center', va='bottom', fontweight='bold')
  71. ax.set_ylabel('Performance Rate (%)', fontsize=12, fontweight='bold')
  72. ax.set_xlabel('Model (Release Date)', fontsize=12, fontweight='bold')
  73. ax.set_title('AI Bias Evolution: 2.5 Years of Development\nMarch 2023 to August 2025',
  74. fontsize=16, fontweight='bold')
  75. ax.set_xticks(x_pos)
  76. ax.set_xticklabels(['GPT-3.5-turbo\n(March 1, 2023)', 'GPT-5-nano\n(August 7, 2025)'])
  77. ax.legend(loc='upper left', fontsize=11)
  78. ax.grid(axis='y', alpha=0.3)
  79. ax.set_ylim(0, 70)
  80. # Add timeline context
  81. ax.text(0.5, 65, '2 years, 5 months of development', ha='center', va='center',
  82. fontsize=12, style='italic',
  83. bbox=dict(boxstyle="round,pad=0.5", facecolor='lightyellow', alpha=0.8))
  84. plt.tight_layout()
  85. plt.savefig('/Users/mdangelo/projects/pf2/site/static/img/blog/bbq-bias/development-timeline-rigorous.png',
  86. dpi=300, bbox_inches='tight', facecolor='white')
  87. plt.close()
  88. def create_methodology_accessibility_chart():
  89. """Create honest chart about methodology accessibility without inflated comparisons"""
  90. fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 8))
  91. # Left panel: Cost breakdown
  92. components = ['GPT-3.5-turbo\n(7.81M tokens)', 'GPT-5-nano\n(22.09M tokens)', 'Total Cost']
  93. costs = [3.96, 6.16, 10.13]
  94. colors = ['#E63946', '#06D6A0', '#2E86AB']
  95. bars = ax1.bar(components, costs, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
  96. for bar, cost in zip(bars, costs):
  97. ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
  98. f'${cost}', ha='center', va='bottom', fontweight='bold', fontsize=12)
  99. ax1.set_ylabel('Cost (USD)', fontsize=12, fontweight='bold')
  100. ax1.set_title('Evaluation Cost Breakdown\n116,972 Total Tests', fontsize=14, fontweight='bold')
  101. ax1.set_ylim(0, 12)
  102. ax1.grid(axis='y', alpha=0.3)
  103. # Right panel: Evaluation scope
  104. metrics = ['Total Tests', 'Social Bias\nCategories', 'Models\nCompared', 'Cost per\n1000 Tests']
  105. values = [116972, 11, 2, 0.087] # $10.13 / 116.972 = $0.087 per 1000 tests
  106. # Use different scales for different metrics
  107. ax2_twin = ax2.twinx()
  108. # Plot total tests on left axis
  109. bar1 = ax2.bar('Total Tests', 116972, color='#2E86AB', alpha=0.8, width=0.6)
  110. ax2.text(0, 120000, '116,972', ha='center', va='bottom', fontweight='bold', fontsize=11)
  111. # Plot other metrics on right axis
  112. other_metrics = ['Social Bias\nCategories', 'Models\nCompared', 'Cost per\n1000 Tests']
  113. other_values = [11, 2, 0.087]
  114. x_pos = [1, 2, 3]
  115. bars2 = ax2_twin.bar(x_pos, other_values, color=['#A23B72', '#F18F01', '#06D6A0'],
  116. alpha=0.8, width=0.6)
  117. for i, (bar, val) in enumerate(zip(bars2, other_values)):
  118. if i == 2: # Cost per 1000 tests
  119. label = f'${val:.3f}'
  120. else:
  121. label = str(int(val))
  122. ax2_twin.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.2,
  123. label, ha='center', va='bottom', fontweight='bold', fontsize=11)
  124. ax2.set_ylabel('Number of Tests', fontsize=12, fontweight='bold', color='#2E86AB')
  125. ax2_twin.set_ylabel('Count / Cost per 1K Tests', fontsize=12, fontweight='bold')
  126. ax2.set_title('Comprehensive Evaluation Scope\nAccessible Methodology', fontsize=14, fontweight='bold')
  127. ax2.set_xticks([0, 1, 2, 3])
  128. ax2.set_xticklabels(['Total Tests'] + other_metrics, rotation=15, ha='right')
  129. ax2.set_ylim(0, 140000)
  130. ax2_twin.set_ylim(0, 15)
  131. plt.suptitle('Bias Evaluation Methodology: Cost and Scope\nStandardized, Reproducible Assessment',
  132. fontsize=16, fontweight='bold', y=0.95)
  133. plt.tight_layout()
  134. plt.savefig('/Users/mdangelo/projects/pf2/site/static/img/blog/bbq-bias/methodology-accessibility-rigorous.png',
  135. dpi=300, bbox_inches='tight', facecolor='white')
  136. plt.close()
  137. def main():
  138. print("šŸŽØ Creating rigorous, professional charts...")
  139. print("šŸ”§ Removing sensationalistic language")
  140. print("šŸ“Š Ensuring data accuracy and clarity")
  141. create_primary_findings_chart()
  142. print("āœ… Primary findings chart created")
  143. create_development_timeline_chart()
  144. print("āœ… Development timeline chart created")
  145. create_methodology_accessibility_chart()
  146. print("āœ… Methodology accessibility chart created")
  147. print("\nšŸ“ˆ All rigorous charts generated successfully!")
  148. print("šŸŽÆ Charts now match the professional tone of the cleaned article")
  149. if __name__ == "__main__":
  150. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...