Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

numerical_transformers.py 1.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
  1. from typing import List
  2. import numpy as np
  3. import pandas as pd
  4. from feature_engine.discretisation import EqualFrequencyDiscretiser
  5. from feature_engine.transformation import LogTransformer, PowerTransformer, BoxCoxTransformer, YeoJohnsonTransformer
  6. # --------------------------------------
  7. # 連続値を等分割したカテゴリーに変換
  8. # --------------------------------------
  9. def equal_freq_discretiser(
  10. df: pd.DataFrame,
  11. num_col_names: List
  12. ) -> pd.DataFrame:
  13. disc = EqualFrequencyDiscretiser(
  14. q=10,
  15. variables=num_col_names
  16. )
  17. df_num_disc = disc.fit_transform(df[num_col_names])
  18. df_num_disc = df_num_disc.add_suffix("_disc")
  19. df_ = pd.concat([df, df_num_disc], axis=1)
  20. return df_
  21. # --------------------------------------
  22. # 連続値を正規変換
  23. # --------------------------------------
  24. def variable_transformer(
  25. df: pd.DataFrame,
  26. num_col_names: List,
  27. variable_type: str
  28. ) -> pd.DataFrame:
  29. if variable_type == "log_transformer":
  30. trans = Logtransfomer(
  31. variables = num_col_names
  32. )
  33. elif variable_type == "power_transformer":
  34. trans = PowerTransformer(
  35. variables = num_col_names
  36. )
  37. elif variable_type == "boxcox_transformer":
  38. trans = BoxCoxTransformer(
  39. variables = num_col_names
  40. )
  41. elif variable_type == "yeojohnson_transformer":
  42. trans = YeoJohnsonTransformer(
  43. variables = num_col_names
  44. )
  45. else:
  46. raise ValueError('variable_type must be either "log_transformer", "power_transformer", "boxcox_transformer", "yeojohnson_transfomer"')
  47. df_trans = trans.fit_transform(df[num_col_names])
  48. df_trans = df_trans.add_suffix(f"_{variable_type}")
  49. df_ = pd.concat([df, df_trans], axis=1)
  50. return df_
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...