Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

utils.py 6.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  1. import numpy as np
  2. import pandas as pd
  3. import os
  4. from collections import UserDict
  5. def load_data(data_dir):
  6. """Load the GEFCom 2014 energy load data"""
  7. energy = pd.read_csv(os.path.join(data_dir, 'energy.csv'), parse_dates=['timestamp'])
  8. # Reindex the dataframe such that the dataframe has a record for every time point
  9. # between the minimum and maximum timestamp in the time series. This helps to
  10. # identify missing time periods in the data (there are none in this dataset).
  11. energy.index = energy['timestamp']
  12. energy = energy.reindex(pd.date_range(min(energy['timestamp']),
  13. max(energy['timestamp']),
  14. freq='H'))
  15. energy = energy.drop('timestamp', axis=1)
  16. return energy
  17. def mape(predictions, actuals):
  18. """Mean absolute percentage error"""
  19. return ((predictions - actuals).abs() / actuals).mean()
  20. def create_evaluation_df(predictions, test_inputs, H, scaler):
  21. """Create a data frame for easy evaluation"""
  22. eval_df = pd.DataFrame(predictions, columns=['t+'+str(t) for t in range(1, H+1)])
  23. eval_df['timestamp'] = test_inputs.dataframe.index
  24. eval_df = pd.melt(eval_df, id_vars='timestamp', value_name='prediction', var_name='h')
  25. eval_df['actual'] = np.transpose(test_inputs['target']).ravel()
  26. eval_df[['prediction', 'actual']] = scaler.inverse_transform(eval_df[['prediction', 'actual']])
  27. return eval_df
  28. class TimeSeriesTensor(UserDict):
  29. """A dictionary of tensors for input into the RNN model.
  30. Use this class to:
  31. 1. Shift the values of the time series to create a Pandas dataframe containing all the data
  32. for a single training example
  33. 2. Discard any samples with missing values
  34. 3. Transform this Pandas dataframe into a numpy array of shape
  35. (samples, time steps, features) for input into Keras
  36. The class takes the following parameters:
  37. - **dataset**: original time series
  38. - **target** name of the target column
  39. - **H**: the forecast horizon
  40. - **tensor_structures**: a dictionary describing the tensor structure of the form
  41. { 'tensor_name' : (range(max_backward_shift, max_forward_shift), [feature, feature, ...] ) }
  42. if features are non-sequential and should not be shifted, use the form
  43. { 'tensor_name' : (None, [feature, feature, ...])}
  44. - **freq**: time series frequency (default 'H' - hourly)
  45. - **drop_incomplete**: (Boolean) whether to drop incomplete samples (default True)
  46. """
  47. def __init__(self, dataset, target, H, tensor_structure, freq='H', drop_incomplete=True):
  48. self.dataset = dataset
  49. self.target = target
  50. self.tensor_structure = tensor_structure
  51. self.tensor_names = list(tensor_structure.keys())
  52. self.dataframe = self._shift_data(H, freq, drop_incomplete)
  53. self.data = self._df2tensors(self.dataframe)
  54. def _shift_data(self, H, freq, drop_incomplete):
  55. # Use the tensor_structures definitions to shift the features in the original dataset.
  56. # The result is a Pandas dataframe with multi-index columns in the hierarchy
  57. # tensor - the name of the input tensor
  58. # feature - the input feature to be shifted
  59. # time step - the time step for the RNN in which the data is input. These labels
  60. # are centred on time t. the forecast creation time
  61. df = self.dataset.copy()
  62. idx_tuples = []
  63. for t in range(1, H+1):
  64. df['t+'+str(t)] = df[self.target].shift(t*-1, freq=freq)
  65. idx_tuples.append(('target', 'y', 't+'+str(t)))
  66. for name, structure in self.tensor_structure.items():
  67. rng = structure[0]
  68. dataset_cols = structure[1]
  69. for col in dataset_cols:
  70. # do not shift non-sequential 'static' features
  71. if rng is None:
  72. df['context_'+col] = df[col]
  73. idx_tuples.append((name, col, 'static'))
  74. else:
  75. for t in rng:
  76. sign = '+' if t > 0 else ''
  77. shift = str(t) if t != 0 else ''
  78. period = 't'+sign+shift
  79. shifted_col = name+'_'+col+'_'+period
  80. df[shifted_col] = df[col].shift(t*-1, freq=freq)
  81. idx_tuples.append((name, col, period))
  82. df = df.drop(self.dataset.columns, axis=1)
  83. idx = pd.MultiIndex.from_tuples(idx_tuples, names=['tensor', 'feature', 'time step'])
  84. df.columns = idx
  85. if drop_incomplete:
  86. df = df.dropna(how='any')
  87. return df
  88. def _df2tensors(self, dataframe):
  89. # Transform the shifted Pandas dataframe into the multidimensional numpy arrays. These
  90. # arrays can be used to input into the keras model and can be accessed by tensor name.
  91. # For example, for a TimeSeriesTensor object named "model_inputs" and a tensor named
  92. # "target", the input tensor can be acccessed with model_inputs['target']
  93. inputs = {}
  94. y = dataframe['target']
  95. y = y.as_matrix()
  96. inputs['target'] = y
  97. for name, structure in self.tensor_structure.items():
  98. rng = structure[0]
  99. cols = structure[1]
  100. tensor = dataframe[name][cols].as_matrix()
  101. if rng is None:
  102. tensor = tensor.reshape(tensor.shape[0], len(cols))
  103. else:
  104. tensor = tensor.reshape(tensor.shape[0], len(cols), len(rng))
  105. tensor = np.transpose(tensor, axes=[0, 2, 1])
  106. inputs[name] = tensor
  107. return inputs
  108. def subset_data(self, new_dataframe):
  109. # Use this function to recreate the input tensors if the shifted dataframe
  110. # has been filtered.
  111. self.dataframe = new_dataframe
  112. self.data = self._df2tensors(self.dataframe)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...