Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

pipeline.py 1.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
  1. from typing import Dict
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.base import BaseEstimator, TransformerMixin
  5. from feature_engine.imputation import (
  6. CategoricalImputer,
  7. AddMissingIndicator,
  8. MeanMedianImputer)
  9. from feature_engine.encoding import (
  10. OrdinalEncoder,
  11. OneHotEncoder,
  12. CountFrequencyEncoder,
  13. RareLabelEncoder
  14. )
  15. from sklearn.pipeline import Pipeline
  16. import lightgbm as lgbm
  17. # --------------------------------------
  18. # 全体処理のパイプライン
  19. # --------------------------------------
  20. def create_pipeline(
  21. config: Dict,
  22. df: pd.DataFrame
  23. ) -> pd.DataFrame:
  24. mypipeline = Pipeline([
  25. # Inputation
  26. ('categorical_imputation'. CategoricalImputer(
  27. imputation_method='missing',
  28. varibales=config["cat_col_names"])
  29. ),
  30. # add missing indicator to numerical variables
  31. ('missing_indicator', AddMissingIndicator(
  32. variables=config["cat_col_names"])
  33. ),
  34. # categorical encoder
  35. ('rare_label_encoder', RareLabelEncoder(
  36. tol=0.05,
  37. variables=config["cat_col_names"])
  38. ),
  39. ('categorical_encoder', OneHotEncoder(
  40. top_categories=10,
  41. drop_last=True,
  42. variables=config["cat_col_names"])
  43. ),
  44. ('classifier', lgbm.LGBMClassifier()),
  45. ])
  46. return mypipeline
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...