1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
- # To add a new cell, type '# %%'
- # To add a new markdown cell, type '# %% [markdown]'
- # %%
- import pandas as pd
- import numpy as np
- from nltk.tokenize import word_tokenize
- from nltk import pos_tag
- from nltk.corpus import stopwords
- from nltk.stem import WordNetLemmatizer
- from sklearn.preprocessing import LabelEncoder
- from collections import defaultdict
- from nltk.corpus import wordnet as wn
- from sklearn.feature_extraction.text import TfidfVectorizer
- from sklearn import model_selection, naive_bayes, svm
- from sklearn.metrics import accuracy_score
- from sklearn.metrics import roc_curve, auc
- from sklearn.metrics import plot_confusion_matrix
- import matplotlib.pyplot as plt
- from sklearn.model_selection import GridSearchCV
- from sklearn.model_selection import train_test_split
- import pickle
- import csv
- import dvc.api
- # %%
- #from google.colab import drive
- #drive.mount('/content/drive/')
- # %%
- #cd /content/drive/My Drive/Colab Notebooks/
- # %%
- train_df_path = "Data/creditcard.csv"
- data_fraud = pd.read_csv(train_df_path)
- '''
- with dvc.api.open(
- 'creditcard.csv',
- remote='remote'
- ) as fd:
- data_fraud = pd.read_csv(fd)
- '''
- # %%
- #print(data_fraud)
- # %%
- y=data_fraud['Class']
- x=data_fraud.drop('Class',axis=1)
- # %%
- x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
- #x_train.head()
- x_test.to_csv("Output/x_test.csv")
- y_test.to_csv("Output/y_test.csv")
- # %%
- SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
- SVM.fit(x_train,y_train)
- Pkl_Filename = "Model/svm_fraud.pkl"
- with open(Pkl_Filename, 'wb') as file:
- pickle.dump(SVM, file)
- # %%
- y_train_pred = SVM.predict(x_train)
- train_fpr, train_tpr, tr_thresholds = roc_curve(y_train, y_train_pred)
- '''
- predictions_SVM = SVM.predict(x_test)
- test_fpr, test_tpr, te_thresholds = roc_curve(y_test, predictions_SVM)
- # Use accuracy_score function to get the accuracy
- print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, y_test)*100)
- '''
- # %%
- plt.plot(train_fpr, train_tpr, label=" AUC TRAIN ="+str(auc(train_fpr, train_tpr)))
- #plt.plot(test_fpr, test_tpr, label=" AUC TEST ="+str(auc(test_fpr, test_tpr)))
- #plt.plot([0,1],[0,1],'g--')
- plt.legend()
- plt.xlabel("True Positive Rate")
- plt.ylabel("False Positive Rate")
- plt.title("AUC(ROC curve)")
- #plt.grid(color='black', linestyle='-', linewidth=0.5)
- #plot_confusion_matrix(SVM, x_test, y_test, normalize='true')
- #plt.show()
|