levin
/
source_code_classification
mirror of https://github.com/whatevernevermindbro/source_code_classification


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
            {
	"import": ["import \\w+ as \\w+", "import \\w+", "from \\w+ import \\w+", "from \\w+ import \\w+ as \\w+"],

	"data_import": ["[ ]{0,1}=[ ]{0,1}open[ ]{0,1}\\(",
			"with open[ ]{0,1}\\([ ]{0,1}[\\'\"]\\w+.\\w+[\\'\"][ ]{0,1},[ ]{0,1}[\\'\"]\\w+[\\'\"]\\) as \\w+[ ]{0,1}:",
			"read_csv", "read_fwf", "read_json", "read_html", "read_clipboard", "read_excel", "read_hdf",
			"read_feathe", "read_parquet", "read_orc", "read_msgpack", "read_stata", "read_sas",
			"read_spss", "read_pickle", "read_sql", "read_gbq"],
	
	"data_export": ["to_csv", "to_json", "to_clipboard", "to_excel", "to_hdf", "to_feathe", "to_parquet",
			"to_msgpack",	"to_stata", "to_pickle", "to_sql", "to_gbq"],

	"preprocessing": ["train_test_split", "Normalize", "StandardScale", "normalize", "scale", "DictVectorize",
			"CountVectorize", "HashingVectorize", "TfidfVectorize", "Stemme", "WordNetLemmartize",
			"Word2Vec", "LancasterStemme", "SnowballStemme", "PorterStemme","stemme", "encode",
			"decode", "lemmatize", "stem", "minmaxscale", "labelencode", "LabelEncode", "concat",
			"concatenate", "merge", "fill", "fillna", "normalize", "crop", "drop", "preprocess_image",
			"vectorize", "tokenize", "tfidf", "replace", "test_size", "map", "filte", "getdummies",
			"fit_transform", "transform", "strip", "split", "apply", "dropna", "clean", "duplicated",
			"drop_duplicates", "reset_index", "reshape", "stemming", "deskew", "str.contains",
			"augmentation", "flatten", "groupby", "sort_values", "pad", "set_vscale", "pivot", "melt",
			"sent_tokenize", "word_tokenize", "join", "preprocess", "preprocessing", "test_split",
			"font_scale", "scale", "color_continuous_scale", "colorscale", "preprocesso", "rescale",
			"reversescale", "inverse_transform", "simple_preprocess", "generation", "augmentation",
			"lemmatization", "scale", "resize", "invertxaxis", "attention", "polynomialfeatures", "autoscale"],

	"visualization": [".subplots", ".scatte", ".set_title", ".set_xlabel", ".set_ylabel",
			".plot", ".legend", ".hist", ".ba", ".barh", ".scatterplot", ".lineplot", ".distplot",
			".countplot", ".boxplot", ".imshow", ".set_xticks", ".set_yticks", ".set_xticklabels",
			".set_yticklabels", ".setp", ".heatmap", ".FacetGrid", ".map", ".kdeplot",
			".pairplot", "scatter_matrix"],

	"model": ["LinearRegression", "RandomForest", "Ridge", "RidgeCV", "RidgeClassifie", "RidgeClassifierCV",
		"SGD", "LogisticRegression", "LogisticRegressionCV", "SVC", "SV", "Laye", "XGboost", "LGBM",
		"Perceptron", "KNeighborsRegresso", "KNeighborsClassifie", "SGDRegresso", "ElasticNet", "ElasticNetCV",
		"KMeans", "AgglomerativeClustering", "SpectralClustering", "CategoricalNB", "ComplementNB",
		"DecisionTreeClassifie", "DecisionTreeRegresso", "Lasso", "CatBoost"],
	
	"deep_learning_model": [".compile()", "tf.nn.", "tf.add", "tf.layers", "tf.train", "Layer(", "Model(",
			"model.summary()", "input_shape=", "Optimize", "Adam", "SGD", "tf.Variable", "K.",
			"torch.nn", ".load_weights(", "model.children", "model.base_layers", "model.parameters", "Sequential(",
			"nn.Module", "F."],
	
	"train": [".fit", "epoch", "loss", "optimize"],

	"predict": [".predict", "_erro", ".mean_", "_score", "_distance"]
}