shruthi-git-actions
/
Fraud_detection


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
            # To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import pickle
import csv
import dvc.api

# %%
#from google.colab import drive
#drive.mount('/content/drive/')


# %%
#cd /content/drive/My Drive/Colab Notebooks/


# %%
train_df_path = "Data/creditcard.csv"
data_fraud = pd.read_csv(train_df_path)
'''
with dvc.api.open(
        'creditcard.csv',
        remote='remote'
        ) as fd:
    data_fraud = pd.read_csv(fd)
'''
# %%
#print(data_fraud)


# %%
y=data_fraud['Class']
x=data_fraud.drop('Class',axis=1)


# %%
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
#x_train.head()
x_test.to_csv("Output/x_test.csv")
y_test.to_csv("Output/y_test.csv")

# %%
SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
SVM.fit(x_train,y_train)
Pkl_Filename = "Model/svm_fraud.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(SVM, file)


# %%
y_train_pred = SVM.predict(x_train)
train_fpr, train_tpr, tr_thresholds = roc_curve(y_train, y_train_pred)
'''
predictions_SVM = SVM.predict(x_test)
test_fpr, test_tpr, te_thresholds = roc_curve(y_test, predictions_SVM)
# Use accuracy_score function to get the accuracy
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, y_test)*100)
'''


# %%
plt.plot(train_fpr, train_tpr, label=" AUC TRAIN ="+str(auc(train_fpr, train_tpr)))
#plt.plot(test_fpr, test_tpr, label=" AUC TEST ="+str(auc(test_fpr, test_tpr)))
#plt.plot([0,1],[0,1],'g--')
plt.legend()
plt.xlabel("True Positive Rate")
plt.ylabel("False Positive Rate")
plt.title("AUC(ROC curve)")
#plt.grid(color='black', linestyle='-', linewidth=0.5)

#plot_confusion_matrix(SVM, x_test, y_test, normalize='true')
#plt.show()