Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

convert_to_hourly.py 2.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  1. """
  2. Input:
  3. parquet: 15-min timestamp index
  4. Load float64
  5. Solar float64
  6. Wind float64
  7. Net Load float64
  8. Renewables float64
  9. Nuclear float64
  10. Large Hydro float64
  11. Imports float64
  12. Generation float64
  13. Thermal float64
  14. Load Less (Generation+Imports) float64
  15. Wind Curtailment float64
  16. Solar Curtailment float64
  17. Output:
  18. parquet: Integer Index
  19. timestamp datetime64[ns, UTC]
  20. load float64
  21. solar float64
  22. wind float64
  23. net_load float64
  24. renewables float64
  25. nuclear float64
  26. large_hydro float64
  27. imports float64
  28. generation float64
  29. thermal float64
  30. load_less_(generation+imports) float64
  31. wind_curtailment float64
  32. solar_curtailment float64
  33. """
  34. import pandas as pd
  35. from src.conf import settings
  36. OUTPUT_DIR = settings.DATA_DIR / "processed/caiso_hourly/"
  37. def main():
  38. INPUT_DIR = settings.DATA_DIR / "processed/caiso/"
  39. export_columns = [
  40. "solar_curtailment", "solar", "net_load", "load",
  41. "generation", "renewables", "wind_curtailment"
  42. ]
  43. for dataset in INPUT_DIR.glob("*.parquet"):
  44. output_fp = OUTPUT_DIR / dataset.name
  45. df = pd.read_parquet(dataset)
  46. # Rename columns
  47. column_map = zip(
  48. df.columns.tolist(),
  49. df.columns.str.replace("\W+", "_").str.replace("\W$", "").str.lower().tolist()
  50. )
  51. df.rename(columns=dict(column_map), inplace=True)
  52. # Convert to MWH
  53. df *= (5/60.) # Convert to MWH
  54. # Roll up to hourly
  55. df = df.groupby(
  56. by=pd.Grouper(freq="H")
  57. )[export_columns].sum()
  58. # If the file already exists, assume we should be appending to it
  59. append = output_fp.exists()
  60. df.to_parquet(
  61. output_fp,
  62. engine="fastparquet",
  63. append=append
  64. )
  65. if __name__ == "__main__":
  66. # Create output directory
  67. OUTPUT_DIR.mkdir(exist_ok=True)
  68. # Create Outputs
  69. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...