Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

01_download_LC_csvs.py 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  1. '''
  2. Downloads LC csvs into dir
  3. '''
  4. # %load ../../lendingclub/csv_dl_archiving/01_download_LC_csvs.py
  5. import os
  6. import time
  7. from shutil import copytree, rmtree
  8. from lendingclub import config
  9. from lendingclub.csv_dl_archiving import download_prep as dp
  10. # setup
  11. now = time.strftime("%m_%d_%Hh_%Mm_%Ss_%Y")
  12. arch_name = 'raw_zipped_csvs_'+now
  13. # make archive if it doesn't exist
  14. os.makedirs(config.arch_dir, exist_ok=True)
  15. # if dir for new downloads exists, delete it
  16. if os.path.isdir(config.raw_dl_dir):
  17. rmtree(config.raw_dl_dir)
  18. # download csvs
  19. dp.download_csvs(config.raw_dl_dir)
  20. # calculate shasum256 hash on just downloaded csvs
  21. just_dled_hashes = dp.get_hashes(config.raw_dl_dir)
  22. if len(os.listdir(config.arch_dir)) == 0:
  23. print("Archives of csvs are empty. Moving just downloaded into archives")
  24. copytree(config.raw_dl_dir, os.path.join(config.arch_dir, arch_name))
  25. else:
  26. compare_dir = dp.get_newest_creationtime_dir(config.arch_dir)
  27. # check if compared to previous time, there are changes/additions/deletions in csvs
  28. archive_flag = dp.check_file_changes(compare_dir, just_dled_hashes)
  29. if archive_flag:
  30. print('Downloads differ from most recent archived csvs. \
  31. Copy to archives')
  32. copytree(config.raw_dl_dir, os.path.join(config.arch_dir, arch_name))
  33. else:
  34. print('Downloads do not differ from last archived. Not archiving')
  35. print('done downloading, checking, and archiving (when necessary) the csv files!!!')
  36. # # if something was different (archive_flag), then store a copy of just_downloaded to archives
  37. # dp.archiver(archive_flag, config.csv_dir, archiver_dir = config.arch_dir)
  38. # # removes old downloads
  39. # dp.cleaner(config.csv_dir)
  40. # # copy the downloaded files to data location
  41. # copytree(download_dir, os.path.join(config.csv_dir, download_dir))
  42. # # delete the original
  43. # rmtree(download_dir)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...