Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

train_evaluate.py 3.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  1. from genericpath import exists
  2. import numpy as np
  3. from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
  4. from sklearn.ensemble import GradientBoostingRegressor
  5. from urllib.parse import urlparse
  6. import pandas as pd
  7. import argparse
  8. from sklearn.model_selection import learning_curve
  9. from get_data import read_params
  10. import joblib
  11. import json
  12. import os
  13. def eval_metrics(act,pred):
  14. r2score=r2_score(act,pred)
  15. rmse=np.sqrt(mean_squared_error(act,pred))
  16. mse=mean_squared_error(act,pred)
  17. mae=mean_absolute_error(act,pred)
  18. return r2score,rmse,mse,mae
  19. def model_eval(config_path):
  20. config=read_params(config_path)
  21. test_data=config["split_data"]["test_path"]
  22. train_data=config["split_data"]["train_path"]
  23. model_dir=config["model_dirs"]
  24. target_col=config["base"]["target_data"]
  25. train=pd.read_csv(train_data,sep=",")
  26. test=pd.read_csv(test_data,sep=",")
  27. learning_rate=config["estimators"]["GradientBoostingRegressor"]["params"]["learning_rate"]
  28. n_estimators=config["estimators"]["GradientBoostingRegressor"]["params"]["n_estimators"]
  29. alpha=config["estimators"]["GradientBoostingRegressor"]["params"]["alpha"]
  30. verbose=config["estimators"]["GradientBoostingRegressor"]["params"]["verbose"]
  31. val_factor=config["estimators"]["GradientBoostingRegressor"]["params"]["validation_fraction"]
  32. tol=config["estimators"]["GradientBoostingRegressor"]["params"]["tol"]
  33. ccp_alpha=config["estimators"]["GradientBoostingRegressor"]["params"]["ccp_alpha"]
  34. x_train,x_test=train.drop(target_col,axis=1),test.drop(target_col,axis=1)
  35. y_train,y_test=train[target_col],test[target_col]
  36. GB=GradientBoostingRegressor(learning_rate=learning_rate,n_estimators=n_estimators,alpha=alpha,verbose=verbose,validation_fraction=val_factor,tol=tol,ccp_alpha=ccp_alpha)
  37. GB.fit(x_train,y_train)
  38. y_pred=GB.predict(x_test)
  39. (r2,rmse,mae,mse)=eval_metrics(y_test,y_pred)
  40. print(r2*100,rmse,mae,mse)
  41. normalized_rmse=rmse/(63770.43-1121)
  42. print(f"normalized rmse::{normalized_rmse}")
  43. os.makedirs(model_dir,exist_ok=True)
  44. model_path=os.path.join(model_dir,"model.pkl")
  45. joblib.dump(GB,model_path)
  46. #################reports logging###############
  47. scores_file=config["reports"]["scores"]
  48. params_file=config["reports"]["params"]
  49. with open(scores_file,"w") as f:
  50. scores={
  51. "rmse":rmse,
  52. "mse":mse,
  53. "r2 score":r2,
  54. "rmse":rmse,
  55. "normalized rmse":normalized_rmse
  56. }
  57. json.dump(scores,f,indent=4)
  58. with open(params_file,"w") as f:
  59. params={
  60. "learning_rate":learning_rate,
  61. "n_estimators":n_estimators,
  62. "verbose":verbose,
  63. "validation_fraction":val_factor,
  64. "tol":tol,
  65. "ccp":ccp_alpha
  66. }
  67. json.dump(params,f,indent=4)
  68. if __name__ == "__main__":
  69. args = argparse.ArgumentParser()
  70. args.add_argument("--config", default="params.yaml")
  71. parsed_args = args.parse_args()
  72. model_eval(config_path=parsed_args.config)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...