Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

evaluate_utilities.py 1.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  1. import pandas as pd
  2. import numpy as np
  3. import yaml
  4. import os
  5. from sklearn.metrics import accuracy_score, classification_report, f1_score
  6. def make_prediction(clf_result):
  7. # Get the index of the maximum probability score
  8. max_index = np.argmax(clf_result["scores"])
  9. predicted_label = clf_result["labels"][max_index]
  10. return predicted_label
  11. def run_batch_prediction(original_data, my_classifier, label_column='label', text_column = 'text'):
  12. # Make a copy of the data
  13. data_copy = original_data.copy()
  14. # The list that will contain the models predictions
  15. final_list_labels = []
  16. for index in range(len(original_data)):
  17. # Run classification
  18. sequences = original_data.iloc[index][text_column]
  19. candidate_labels = list(original_data[label_column].unique())
  20. result = my_classifier(sequences, candidate_labels, multi_class = True)
  21. # Make prediction
  22. final_list_labels.append(make_prediction(result))
  23. # Create the new column for the predictions
  24. data_copy["model_labels"] = final_list_labels
  25. return data_copy
  26. def get_data_sample(df, language, sample_size = 100):
  27. lang_df = df[df['language']==language].sample(sample_size)
  28. return lang_df
  29. def get_performance(df):
  30. performance = {}
  31. performance["accuracy"] = accuracy_score(df["label"], df["model_labels"])
  32. report = classification_report(df["label"], df["model_labels"], output_dict=True)
  33. performance["f1_score"] = report['macro avg']["f1-score"]
  34. return performance
  35. def predictions_evaluation(data, my_classifier):
  36. predictions = run_batch_prediction(data, my_classifier)
  37. return get_performance(predictions)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...