Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

process.py 1.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  1. import pathlib
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4. import pandas as pd
  5. from tensorflow.keras.utils import to_categorical
  6. from zntrack import Node, dvc, zn
  7. class DataPreprocessor(Node):
  8. """Prepare kaggle dataset for training
  9. * normalize and reshape the features
  10. * one-hot encode the labels
  11. """
  12. # dependencies and parameters
  13. data: pathlib.Path = dvc.deps()
  14. dataset = zn.params()
  15. # outputs
  16. features: np.ndarray = zn.outs()
  17. labels: np.ndarray = zn.outs()
  18. def run(self):
  19. """Primary Node Method"""
  20. df = pd.read_csv((self.data / self.dataset / self.dataset).with_suffix(".csv"))
  21. self.labels = df.values[:, 0]
  22. self.labels = to_categorical(self.labels)
  23. self.features = df.values[:, 1:]
  24. self.normalize_and_scale_data()
  25. def normalize_and_scale_data(self):
  26. self.features = self.features / 255
  27. self.features = self.features.reshape((-1, 28, 28, 1))
  28. def plot_image(self, index):
  29. plt.imshow(self.features[index])
  30. plt.title(f"Label {self.labels[index].argmax()}")
  31. plt.show()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...