Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

02_unzip_csvs.py 1.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  1. '''
  2. for unzipping the newly downloaded csvs
  3. '''
  4. import logging
  5. import pathlib
  6. import subprocess
  7. from lendingclub import config
  8. import os
  9. import shutil
  10. latest_csvs = config.wrk_csv_dir
  11. raw_dl_csvs = config.raw_dl_dir
  12. # if wrd_dir exists, delete to make anew
  13. if os.path.exists(latest_csvs):
  14. print(f'deleting existing {latest_csvs} folder')
  15. shutil.rmtree(latest_csvs)
  16. os.makedirs(latest_csvs)
  17. print(f'extracting zips from {raw_dl_csvs} to {latest_csvs} \n')
  18. zip_files = pathlib.Path(raw_dl_csvs).rglob("*.zip")
  19. while True:
  20. try:
  21. path = next(zip_files)
  22. except StopIteration:
  23. break # no more files
  24. except PermissionError:
  25. logging.exception("permission error")
  26. else:
  27. extract_dir = pathlib.Path(latest_csvs)
  28. subprocess.call(['unzip', '-o', path, '-d', extract_dir])
  29. # as of 1/15/2020, unzipping the pmt_hist.zip becomes a .gz.
  30. # Should check latest_csvs and try to unzip any file not ending in
  31. # .csvs
  32. gz_files = pathlib.Path(latest_csvs).rglob("*.gz")
  33. while True:
  34. try:
  35. path = next(gz_files)
  36. except StopIteration:
  37. break # no more files
  38. except PermissionError:
  39. logging.exception("permission error")
  40. else:
  41. extract_dir = pathlib.Path(latest_csvs)
  42. print(f'extracting .gz file {path}')
  43. subprocess.call(['gunzip', path])
  44. print('all zip files have been unzipped')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...