shruthi-git-actions
/
SVM_fraud_detection


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
            import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import confusion_matrix
import json

x_test = pd.read_csv('Data/x_test.csv')
x_test = x_test.iloc[1: , :]
y_test = pd.read_csv('Data/y_test.csv')
x_test = x_test.iloc[: , 1:]
y_test=y_test.iloc[: , 1:]
Pkl_Filename = "Models/Fraud_SVM.pkl"  

with open(Pkl_Filename, 'rb') as file:  
    SVM = pickle.load(file)

predictions_SVM = SVM.predict(x_test)

test_fpr, test_tpr, te_thresholds = roc_curve(y_test, predictions_SVM)
print(test_fpr)
print(test_tpr)

plt.subplots(1, figsize=(10,10))
plt.title('Receiver Operating Characteristic')
a=plt.plot(test_fpr, test_tpr)
plt.plot([0, 1], ls="--")
plt.plot([0, 0], [1, 0] , c=".7"), plt.plot([1, 1] , c=".7")
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
plt.savefig('auc.png')

# Use accuracy_score function to get the accuracy
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, y_test)*100)

accuracy=accuracy_score(predictions_SVM, y_test)*100
print(accuracy)
#a={"Accuracy": accuracy, "fpr": test_fpr,"tpr": test_tpr}
#b=a.tolist()
data = {'accuracy':accuracy,'fpr':test_fpr.tolist(),'tpr':test_tpr.tolist()}


with open('Output/Accuracy.json', 'w') as f:
	json.dump(data,f, sort_keys=True, indent=4, separators=(',', ': '))
    	

#Accuracy=accuracy_score(predictions_SVM, y_test)*100
#a={"Accuracy": Accuracy, "fpr": test_fpr,"tpr": test_tpr}
#b=a.tolist()
#data = {'accuracy':accuracy,'fpr':test_fpr.tolist(),'tpr':test_tpr.tolist()}
'''
with open('Output/Accuracy.json', 'w') as f:
	json.dump(data,f, indent=4, separators=(',', ': '))

'''
with open('plots.json', 'w') as fd:
	json.dump(
        {
            "plots": [
                {"fpr": fp, "tpr": tp, "threshold": t}
                for fp, tp, t in zip(test_fpr.tolist(), test_tpr.tolist(), te_thresholds.tolist())
            ]
        },
        fd,
        indent=4,
    )   


#a=plt.plot_roc_curve(test_fpr, test_tpr, label=" AUC TEST ="+str(auc(test_fpr, test_tpr)))

#plt.legend()
#plt.xlabel("True Positive Rate")
#plt.ylabel("False Positive Rate")
#plt.title("AUC(ROC curve)")

plt.plot(test_fpr, test_tpr, label=" AUC TEST ="+str(auc(test_fpr, test_tpr)))
plt.legend()
plt.xlabel("True Positive Rate")
plt.ylabel("False Positive Rate")
plt.title("AUC(ROC curve)")


plot_confusion_matrix(SVM, x_test, y_test, normalize='true')

plt.savefig('confusion_mat.png')

'''
a=confusion_matrix(y_test,predictions_SVM)
b=a.tolist()
with open('Output/Confusion_matrix.json', 'w') as f:
    json.dump(b, f)
'''