Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

aggregate_UN_data.R 2.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
  1. library(readr)
  2. library(dplyr)
  3. library(tidyr)
  4. # Read all raw datasets from UN into one dataframe ---------
  5. df <- list.files(path='data/raw/UN Data/', full.names = TRUE,
  6. pattern = '*.csv$') %>%
  7. lapply(read_delim, delim = ',', skip = 1) %>%
  8. bind_rows
  9. # Remove columns that won't contribute to comparisons among countries
  10. # or variables that we will deal with later.
  11. df %>%
  12. select(-c(`Capital City`, `Capital City footnote`,
  13. `System of trade footnote`, `National currency`,
  14. `National currency footnote`, `Last Election Date footnote`,
  15. `Major trading partner 1 (% of exports) footnote`,
  16. `Tourism arrivals series type footnote`, Footnotes,
  17. `Region/Country/Area Year Series Value Footnotes Source`,
  18. Source, `Major trading partner 1 (% of exports)`,
  19. `Tourism arrivals series type`, `Last Election Date`,
  20. `System of trade`)) -> df
  21. # Bring it to a more usable format --------------------------------------------
  22. # Rename columns
  23. colnames(df) <- c('region_code', 'region_name', 'variable', 'year', 'value')
  24. # Let's work on this dataframe now
  25. df %>%
  26. # Keep only latest data for each variable
  27. group_by(variable) %>%
  28. filter(year == max(year)) %>%
  29. ungroup() %>%
  30. # Bring variables from rows to column
  31. pivot_wider(names_from = c(variable, year), values_from = value) -> df
  32. # region_code was important for having a unique ID but now it's useless
  33. df %>%
  34. select(-c(region_code)) -> df
  35. # Let's add major trading partner (exportations)
  36. # export
  37. trading_partners <- read_delim('data/raw/UN Data/SYB62_330_201907_Major Trading Partners.csv',
  38. skip = 1, delim=',')
  39. trading_partners %>%
  40. group_by(Series) %>%
  41. filter(Year == max(Year)) %>%
  42. filter(Series %in% c('Major trading partner 1 (% of exports)')) %>%
  43. pivot_wider(names_from = Series,
  44. values_from = `Major trading partner 1 (% of exports)`) %>%
  45. ungroup() %>%
  46. select(-c(`Region/Country/Area`, Year,
  47. `Major trading partner 1 (% of exports) footnote`, Value,
  48. Footnotes, Source)) -> trading_partners
  49. colnames(trading_partners) <- c('region_name', 'whos_major_trade_partner_exp_1')
  50. df <- left_join(df, trading_partners, by = 'region_name')
  51. rm(trading_partners)
  52. # Save UN dataset -------------------------------------
  53. write_tsv(x = df, path = 'data/raw/UN_dataset.tsv', quote_escape = FALSE)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...