Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

io.py 3.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  1. import os
  2. import shutil
  3. import tempfile
  4. import warnings
  5. import numpy as np
  6. from shapely.geometry import Point
  7. from geopandas import GeoDataFrame, GeoSeries, read_file, read_parquet, read_feather
  8. # TEMP: hide warning from to_parquet
  9. warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")
  10. format_dict = {
  11. "ESRI Shapefile": (
  12. ".shp",
  13. lambda gdf, filename: gdf.to_file(filename, driver="ESRI Shapefile"),
  14. lambda filename: read_file(filename, driver="ESRI Shapefile"),
  15. ),
  16. "GeoJSON": (
  17. ".json",
  18. lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"),
  19. lambda filename: read_file(filename, driver="GeoJSON"),
  20. ),
  21. "GPKG": (
  22. ".gpkg",
  23. lambda gdf, filename: gdf.to_file(filename, driver="GeoJSON"),
  24. lambda filename: read_file(filename, driver="GeoJSON"),
  25. ),
  26. "Parquet": (
  27. ".parquet",
  28. lambda gdf, filename: gdf.to_parquet(filename),
  29. lambda filename: read_parquet(filename),
  30. ),
  31. "Feather": (
  32. ".feather",
  33. lambda gdf, filename: gdf.to_feather(filename),
  34. lambda filename: read_feather(filename),
  35. ),
  36. }
  37. class Bench:
  38. params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"]
  39. param_names = ["file_format"]
  40. def setup(self, file_format):
  41. self.ext, self.writer, self.reader = format_dict[file_format]
  42. num_points = 20000
  43. xs = np.random.rand(num_points)
  44. ys = np.random.rand(num_points)
  45. self.points = GeoSeries([Point(x, y) for (x, y) in zip(xs, ys)])
  46. self.df = GeoDataFrame(
  47. {
  48. "geometry": self.points,
  49. "x": xs,
  50. "y": ys,
  51. "s": np.zeros(num_points, dtype="object"),
  52. }
  53. )
  54. self.tmpdir = tempfile.mkdtemp()
  55. self.filename = os.path.join(self.tmpdir, "frame" + self.ext)
  56. self.writer(self.df, self.filename)
  57. def teardown(self, file_format):
  58. shutil.rmtree(self.tmpdir)
  59. class BenchFrame(Bench):
  60. params = ["ESRI Shapefile", "GeoJSON", "GPKG", "Parquet", "Feather"]
  61. param_names = ["file_format"]
  62. def time_write(self, file_format):
  63. with tempfile.TemporaryDirectory() as tmpdir:
  64. out_filename = os.path.join(tmpdir, "frame" + self.ext)
  65. self.writer(self.df, out_filename)
  66. def time_read(self, file_format):
  67. self.reader(self.filename)
  68. class BenchSeries(Bench):
  69. params = ["ESRI Shapefile", "GeoJSON", "GPKG"]
  70. param_names = ["file_format"]
  71. def setup(self, file_format):
  72. super().setup(file_format)
  73. self.filename_series = os.path.join(self.tmpdir, "series" + self.ext)
  74. self.writer(self.points, self.filename_series)
  75. def time_write_series(self, file_format):
  76. with tempfile.TemporaryDirectory() as tmpdir:
  77. out_filename = os.path.join(tmpdir, "series" + self.ext)
  78. self.writer(self.points, out_filename)
  79. def time_read_series(self, file_format):
  80. GeoSeries.from_file(self.filename_series)
  81. def time_read_series_from_frame(self, file_format):
  82. GeoSeries.from_file(self.filename)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...