levin
/
nl2ml
mirror of https://gitlab.com/lambda-hse/nl2ml


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
            {	
	"Hypothesis.statistical_test": ["shapiro(", "normaltest(", "anderson(", "pearsonr(", "spearmanr(", "kendalltau(", "chi2_contingency(", "adfuller(", "kpss(", "ttest_ind"],

	"Environment.import_modules":  ["import \\w+ as \\w+", "import \\w+", "from \\w+ import \\w+", "from \\w+ import \\w+ as \\w+"],
	"Environment.set_options": ["os.environ", " pd.set_option"], 
	"Environment.get_options": ["import"],
 
	"Data_Extraction.load_from_url":["read_html"],
	"Data_Extraction.load_from_sql": ["read_sql"],
	"Data_Extraction.load_from_disk": ["read_fwf", "read_json", "read_clipboard", "read_excel", "read_hdf", "read_feather", "read_parquet", "read_orc", "read_msgpack", "read_stata", "read_sas", "read_spss", "read_pickle", "read_gbq"],
	"Data_Extraction.load_from_csv": ["read_csv"],

	"EDA.show_table": [".head", ".tail"],
	"EDA.show_table_attributes": [".columns", ".index"],
	"EDA.count_missing_values": [".isnull().sum"],
	"EDA.count_duplicates": ["len( \\w+)[  ]{0,1}-[  ]{0,1}len( \\w+).drop_duplicates"],
	"EDA.count_data_types": [".dtypes.value_counts("],

	"Data_Transform.create_dataframe": ["DataFrame"],
	"Data_Transform.remove_duplicates": ["drop_duplicates"],
	"Data_Transform.correct_missing_values": ["fillna", "SimpleImputer(missing_values="],
	"Data_Transform.normalization": ["normalize(", "StandartScaler(", "RobustScaler(", "MinMaxScaler(", "mean(\\w+)[ ]{0, 1}/[ ]{0, 1}std", "mean(\\w+))[ ]{0, 1}/[ ]{0, 1}np.std", ".mean()/[ ]{0, 1}[ ]{0, 1}/[ ]{0, 1}\\w+.std", "mean())/[ ]{0, 1}/[ ]{0, 1}\\w+.std"],
	"Data_Transform.data_type_conversions": ["to_numeric", "astype", "to_datetime", "to_timedelta", "infer_objects", "convert_dtypes"],
	"Data_Transform.randomize_order": [".random.shuffle"],
	"Data_Transform.split": [".split", "KFold("],
	"Data_Transform.filter": [".loc[\\w[ ]{0, 1}==]", ".loc[\\w[ ]{0, 1}>]", ".loc[\\w[ ]{0, 1}<]", ".iloc[\\w[ ]{0, 1}==]", ".iloc[\\w[ ]{0, 1}>]", ".iloc[\\w[ ]{0, 1}<]"],
	"Data_Transform.concatenate": ["concatenate"],
	"Data_Transform.drop_column": ["drop_column"],
	"Data_Transform.sort_values": ["sort_values"],
	"Data_Transform.feature_engineering": ["OneHotEncoder(", "Binarizer(", "FunctionTransformer(", "KBinsDiscretizer(",  "KernelCenterer(", "LabelBinarizer(", "LabelEncoder(", "MultiLabelBinarizer(", "MaxAbsScaler(", "MinMaxScaler(", "Normalizer(", "OrdinalEncoder(", "PolynomialFeatures(", "PowerTransformer(", "QuantileTransformer(", "binarize(", "label_binarize(", "power_transform(", ".apply("],
"Data_Transform.to_dummies":["get_dummies(", "add_dummy_feature("],
"Data_Transform.prepare_x_and_y": ["\\w+x, \\w+y ="],
"Data_Transform.categorify": ["Categorify"],


	"Model_Train.choose_model_class": ["LinearRegression(", "RandomForest(", "Ridge(", "RidgeCV(", "RidgeClassifier(", "RidgeClassifierCV(", "SGD(", "LogisticRegression(", "LogisticRegressionCV(", "SVC(", "SVR(", "Layer(", "XGboost(", "LGBM(", "Perceptron(", "KNeighborsRegressor(", "KNeighborsClassifier(", "SGDRegressor(", "ElasticNet(", "KMeans(", "AgglomerativeClustering(", "SpectralClustering(", "CategoricalNB(", "ComplementNB(", "DecisionTreeClassifier(", "DecisionTreeRegressor(", "Lasso(", "CatBoost(", "ElasticNetCV(", "Dense(", "Activation(", "Embedding(", "Masking(", "Lambda(", "Conv\\dD(", "SeparableConv\\dD(", "DepthwiseConv\\dD(", "Conv\\dDTranspose(", "MaxPooling\\dD(", "AveragePooling\\dD(", "GlobalPooling\\dD(", "GlobalAveragePooling\\dD(", "LSTM(", "GRU(", "RNN(", "SimpleRNN(", "Bidirectional(", "ConvLSTM2D(", "CategoryEncoding(", "CategoryCrossing(", "BatchNormalization(", "LayerNormalization(", "Dropout(", "SpatialDropout\\dD(", "GaussianDropout(", "GaussianNoise(", "ActivityRegularization(", "AlphaDropout(", "Attention(", "AdditiveAttention(", "Cropping\\dD(", "UpSampling\\dD(", "ZeroPadding\\dD(", "LocallyConnected\\dD(", "ReLU(", "Softmax(", "LeakyReLU(", "PReLU(", "ELU(", "ThresholdedReLU("],
	"Model_Train.train_model": [".fit("],
	"Model_Train.metric_computation": ["history[\"loss\"]", "history[\"accuracy\"]"],
	"Model_Train.predict": [".predict(", ".predict_proba("],

	"Model_Evaluation.compute_test_metric": ["KLDivergence class", "kl_divergence function", "MeanSquaredError", "MeanAbsoluteError", "mean_squared_error", "MeanAbsolutePercentageError", "MeanSquaredLogarithmicError", "CosineSimilarity", "mean_absolute_error", "mean_absolute_percentage_error", "mean_squared_logarithmic_error", "huber", "holdout"],
	"Model_Evaluation.predict_on_test": [".predict(test", ".predict(\\w+test", ".predict_proba(test", ".predict_proba(\\w+test"],

	"Model_Interpretation.get_coefficients": [".coef_"],

	"Hyperparam_Tuning.find_best_score": [ "best_score_"],
	"Hyperparam_Tuning.find_best_params": [".best_params_", ".best_params", "best_index_"],
	"Hyperparam_Tuning.find_best_model_class": ["for model\\w+ in", "for \\w+model in", "best_estimator_"],
	"Hyperparam_Tuning.train_on_grid": ["GridSearchCV("],
	"Hyperparam_Tuning.define_search_space": ["hp.choice(", "hp.uniform(", "hp.randint(", "hp.quniform(", "hp.loguniform(", "hp.qloguniform(", "hp.normal(", "hp.qnormal(", "hp.lognormal(", "hp.qlognormal("] ,
"Hyperparam_Tuning.fit_one_cycle": ["fit_one_cycle("],

	"Visualization.learning_history": ["plot(\\w+history"], 
	"Visualization.distribution": ["distplot(", ".heatmap", ".hist"],
	"Visualization.wandb": ["wandb"],
	"Visualization.missing_values": ["msno."],

	"Data_Export.save_to_csv": ["to_csv"],

	"Production.send_to_prod_environment": ["kaggle competitions submit"],
	"Production.save_weights": ["save_weights("]
}