Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

stage_01_get_data.py 1.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  1. import argparse
  2. import os
  3. import logging
  4. from src.utils.common import read_yaml, create_directories
  5. import urllib.request as req
  6. STAGE = "stage 01 get data" ## <<< change stage name
  7. logging.basicConfig(
  8. filename=os.path.join("logs", 'running_logs.log'),
  9. level=logging.INFO,
  10. format="[%(asctime)s: %(levelname)s: %(module)s]: %(message)s",
  11. filemode="a"
  12. )
  13. def main(config_path):
  14. ## read config files
  15. config = read_yaml(config_path)
  16. source_data_url = config["source_data_url"]
  17. local_data_dir = config["source_download_dir"]["data_dir"]
  18. create_directories([local_data_dir])
  19. data_filename = config["source_download_dir"]["data_file"]
  20. local_data_filepath = os.path.join(local_data_dir, data_filename)
  21. logging.info("Download started")
  22. filename, headers = req.urlretrieve(source_data_url, local_data_filepath)
  23. logging.info(f"Download completed")
  24. logging.info(f"Download file is present at: {filename}")
  25. logging.info(f"Download headers: \n{headers}")
  26. if __name__ == '__main__':
  27. args = argparse.ArgumentParser()
  28. args.add_argument("--config", "-c", default="configs/config.yaml")
  29. parsed_args = args.parse_args()
  30. try:
  31. logging.info("\n********************")
  32. logging.info(f">>>>> stage {STAGE} started <<<<<")
  33. main(config_path=parsed_args.config)
  34. logging.info(f">>>>> stage {STAGE} completed!<<<<<\n")
  35. except Exception as e:
  36. logging.exception(e)
  37. raise e
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...