Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

clean_powerplants.py 4.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  1. """
  2. Schema:
  3. OBJECTID_12_13 int64
  4. Plant_ID object
  5. Plant_Alias object
  6. Plant_Label object
  7. MW float64
  8. Gross_MWh float64
  9. Net_MWh float64
  10. General_Fuel object
  11. Status object
  12. STEP_License_Status object
  13. Gen_Count float64
  14. Initial_Start_Date float64
  15. Online_Year float64
  16. Retire_Date float64
  17. Docket_ID object
  18. CEC_Sited_Flag object
  19. STEP_Project_Type object
  20. REAT_ID object
  21. WREGIS_ID object
  22. County object
  23. State_ object
  24. Peaker float64
  25. Renewable_Energy float64
  26. CEC_Jurisdictional_Plant object
  27. CEC_Data_Source object
  28. Wind_Resource object
  29. LRA object
  30. Sub_Area object
  31. Service_Area object
  32. Service_Category object
  33. Caliso_Balancing_Authorities object
  34. Air_District object
  35. Air_Basin object
  36. Quad object
  37. Senate_District object
  38. Assembly_District object
  39. Congressional_District object
  40. CES30_PercentileRange object
  41. CES30_Percentile float64
  42. Website object
  43. CEC_Link object
  44. Aerial object
  45. C_Comments object
  46. Longitude float64
  47. Latitude float64
  48. Elevation float64
  49. Operation_Job float64
  50. Property_Tax float64
  51. Capacity_Factor float64
  52. Income_Percent float64
  53. Project_Location object
  54. Creator object
  55. Creator_Date int64
  56. Last_Editor object
  57. Last_Editor_Date int64
  58. GlobalID object
  59. geometry geometry
  60. dtype: object
  61. """
  62. import pandas as pd
  63. import geopandas as gpd
  64. from enum import Enum
  65. from src.conf import settings
  66. OUTPUT = settings.DATA_DIR / f"processed/geography/powerplants.parquet"
  67. class FuelTypes(Enum):
  68. SOLAR = "Solar"
  69. GAS = "Gas"
  70. HYDRO = "Hydro"
  71. WIND = "Wind"
  72. LANDFILL_GAS = "Landfill Gas"
  73. GEOTHERMAL = "Geothermal"
  74. BIOMASS = "Biomass"
  75. DIGESTER_GAS = "Digester Gas"
  76. COAL = "Coal"
  77. SOLAR_THERMAL = "Solar Thermal"
  78. BATTERY = "Battery"
  79. MSW = "MSW"
  80. NUCLEAR = "Nuclear"
  81. def main():
  82. powerplants = gpd.read_file(
  83. settings.DATA_DIR / f"raw/geography/powerplants.geojson", crs="CRS84"
  84. )
  85. # Drop powerplants with missing locations
  86. powerplants = powerplants.dropna(subset=["geometry", "Initial_Start_Date"]).copy()
  87. # Convert dates
  88. powerplants["_online_date"] = pd.to_datetime(
  89. powerplants["Initial_Start_Date"], unit="ms"
  90. )
  91. powerplants["_retire_date"] = pd.to_datetime(powerplants["Retire_Date"], unit="ms")
  92. # Filter for solar (and maybe wind in the future)
  93. powerplants = powerplants[
  94. powerplants["General_Fuel"].isin(
  95. [FuelTypes.SOLAR.value, FuelTypes.SOLAR_THERMAL.value]
  96. )
  97. ].copy()
  98. column_mappings = {
  99. "MW": "capacity_mw",
  100. "County": "county_name",
  101. "State_": "state",
  102. "_online_date": "online_date",
  103. "_retire_date": "retire_date",
  104. "Longitude": "longitude",
  105. "Latitude": "latitude",
  106. "General_Fuel": "fuel",
  107. }
  108. output_columns = list(column_mappings.values())
  109. powerplants = powerplants.rename(columns=column_mappings)
  110. # Ensure all lat-lon coords are consistent with their geometry (some are different!)
  111. # Note that CRS84 swaps lat-lon!
  112. powerplants["longitude"] = powerplants["geometry"].x
  113. powerplants["latitude"] = powerplants["geometry"].y
  114. powerplants[output_columns].to_parquet(OUTPUT, index=False, engine="fastparquet")
  115. if __name__ == "__main__":
  116. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...