Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

feature_selection.py 2.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
  1. from typing import List
  2. import numpy as np
  3. import pandas as pd
  4. from feature_engine.selection import (
  5. DropFeatures,
  6. DropConstantFeatures,
  7. DropDuplicateFeatures,
  8. SmartCorrelatedSelection,
  9. DropHighPSIFeatures
  10. )
  11. # --------------------------------------
  12. # 特徴量の削除
  13. # --------------------------------------
  14. def drop_features(
  15. df: pd.DataFrame,
  16. drop_cols: List
  17. ) -> pd.DataFrame:
  18. transformer = DropFeatures(
  19. features_to_drop=drop_cols
  20. )
  21. df = transformer.fit_transform(df)
  22. return df
  23. # --------------------------------------
  24. # 変化の乏しい特徴量の削除
  25. # --------------------------------------
  26. def drop_constant_features(
  27. df: pd.DataFrame,
  28. ) -> pd.DataFrame:
  29. transformer = DropConstantFeatures(
  30. tol=0.8,
  31. missing_values='ignore'
  32. )
  33. transformer.fit(df)
  34. df = transformer.transform(df)
  35. return df
  36. # --------------------------------------
  37. # 不安定な特徴量の削除
  38. # --------------------------------------
  39. def drop_high_psi_features(
  40. df: pd.DataFrame,
  41. drop_cols: List
  42. ) -> pd.DataFrame:
  43. transformer = DropHighPSIFeatures(
  44. split_frac=0.6
  45. )
  46. df = transformer.fit_transform(df)
  47. return df
  48. # --------------------------------------
  49. # 同一特徴量の削除
  50. # --------------------------------------
  51. def drop_duplicate_features(
  52. df: pd.DataFrame,
  53. ) -> pd.DataFrame:
  54. transformer = DropDuplicateFeatures()
  55. df = transformer.fit_transform(df)
  56. return df
  57. # --------------------------------------
  58. # 相関している特徴量の削除
  59. # --------------------------------------
  60. def drop_smartcorr_features(
  61. df: pd.DataFrame,
  62. ) -> pd.DataFrame:
  63. transformer = SmartCorrelatedSelection(
  64. variables=None,
  65. method="pearson",
  66. threshold=0.8,
  67. missing_values="raise",
  68. selection_method="variable",
  69. estimator=None
  70. )
  71. df = transformer.fit_transform(df)
  72. return df
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...