Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

main.py 2.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  1. from urllib.parse import urlparse
  2. import warnings
  3. import numpy as np
  4. import pandas as pd
  5. from sklearn.ensemble import RandomForestClassifier
  6. from sklearn.metrics import classification_report
  7. from sklearn.model_selection import train_test_split
  8. from sklearn import datasets
  9. import mlflow
  10. from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  11. import os
  12. import mlflow.sklearn
  13. # Load data
  14. wine = datasets.load_wine()
  15. X = wine.data
  16. y = wine.target
  17. # create pandas dataframe for data
  18. df = pd.DataFrame(X, columns=wine.feature_names)
  19. df['target'] = y
  20. # evaluate metrics function
  21. def evaluate_metrics(actual, predicted):
  22. report = classification_report(actual, predicted)
  23. return report
  24. if __name__ == "__main__":
  25. warnings.filterwarnings("ignore")
  26. np.random.seed(40)
  27. #One must be commented **either** the local or the remote tracking URI
  28. # Set the local directory to log MLflow runs(uncomment the line below if you want to run on local)
  29. # mlflow.set_tracking_uri('file://' + os.path.abspath('./mlruns'))
  30. #Set the tracking URI based on the environment (uncomment this codes if you want to run on dagshub)
  31. remote_server_uri = "https://dagshub.com/bende.tymer/MLOps-Basics.mlflow"
  32. mlflow.set_tracking_uri(remote_server_uri)
  33. # split the dataset
  34. train, test = train_test_split(df, test_size=0.2)
  35. train_x = train.drop('target', axis=1)
  36. train_y = train['target']
  37. test_x = test.drop('target', axis=1)
  38. test_y = test['target']
  39. # run model
  40. rc = RandomForestClassifier(random_state=42)
  41. rc.fit(train_x, train_y)
  42. predicted_ = rc.predict(test_x)
  43. # evaluate metrics
  44. report = evaluate_metrics(test_y, predicted_)
  45. print("The report for prediction", report)
  46. accuracy = accuracy_score(test_y, predicted_)
  47. precision = precision_score(test_y, predicted_, average='weighted')
  48. recall = recall_score(test_y, predicted_, average='weighted')
  49. f1 = f1_score(test_y, predicted_, average='weighted')
  50. # Log mlflow attributes for mlflow UI
  51. metrics = {
  52. "accuracy": accuracy,
  53. "precision": precision,
  54. "recall": recall,
  55. "f1": f1
  56. }
  57. mlflow.log_metrics(metrics)
  58. mlflow.sklearn.log_model(rc, "model")
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...