Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

fraud_svm.py 2.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  1. # To add a new cell, type '# %%'
  2. # To add a new markdown cell, type '# %% [markdown]'
  3. # %%
  4. import pandas as pd
  5. import numpy as np
  6. from nltk.tokenize import word_tokenize
  7. from nltk import pos_tag
  8. from nltk.corpus import stopwords
  9. from nltk.stem import WordNetLemmatizer
  10. from sklearn.preprocessing import LabelEncoder
  11. from collections import defaultdict
  12. from nltk.corpus import wordnet as wn
  13. from sklearn.feature_extraction.text import TfidfVectorizer
  14. from sklearn import model_selection, naive_bayes, svm
  15. from sklearn.metrics import accuracy_score
  16. from sklearn.metrics import roc_curve, auc
  17. from sklearn.metrics import plot_confusion_matrix
  18. import matplotlib.pyplot as plt
  19. from sklearn.model_selection import GridSearchCV
  20. from sklearn.model_selection import train_test_split
  21. import pickle
  22. import csv
  23. import dvc.api
  24. # %%
  25. #from google.colab import drive
  26. #drive.mount('/content/drive/')
  27. # %%
  28. #cd /content/drive/My Drive/Colab Notebooks/
  29. # %%
  30. train_df_path = "Data/creditcard.csv"
  31. data_fraud = pd.read_csv(train_df_path)
  32. '''
  33. with dvc.api.open(
  34. 'creditcard.csv',
  35. remote='remote'
  36. ) as fd:
  37. data_fraud = pd.read_csv(fd)
  38. '''
  39. # %%
  40. #print(data_fraud)
  41. # %%
  42. y=data_fraud['Class']
  43. x=data_fraud.drop('Class',axis=1)
  44. # %%
  45. x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
  46. #x_train.head()
  47. x_test.to_csv("Output/x_test.csv")
  48. y_test.to_csv("Output/y_test.csv")
  49. # %%
  50. SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
  51. SVM.fit(x_train,y_train)
  52. Pkl_Filename = "Model/svm_fraud.pkl"
  53. with open(Pkl_Filename, 'wb') as file:
  54. pickle.dump(SVM, file)
  55. # %%
  56. y_train_pred = SVM.predict(x_train)
  57. train_fpr, train_tpr, tr_thresholds = roc_curve(y_train, y_train_pred)
  58. '''
  59. predictions_SVM = SVM.predict(x_test)
  60. test_fpr, test_tpr, te_thresholds = roc_curve(y_test, predictions_SVM)
  61. # Use accuracy_score function to get the accuracy
  62. print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, y_test)*100)
  63. '''
  64. # %%
  65. plt.plot(train_fpr, train_tpr, label=" AUC TRAIN ="+str(auc(train_fpr, train_tpr)))
  66. #plt.plot(test_fpr, test_tpr, label=" AUC TEST ="+str(auc(test_fpr, test_tpr)))
  67. #plt.plot([0,1],[0,1],'g--')
  68. plt.legend()
  69. plt.xlabel("True Positive Rate")
  70. plt.ylabel("False Positive Rate")
  71. plt.title("AUC(ROC curve)")
  72. #plt.grid(color='black', linestyle='-', linewidth=0.5)
  73. #plot_confusion_matrix(SVM, x_test, y_test, normalize='true')
  74. #plt.show()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...