Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

aggregate_UN_data.R 2.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  1. library(readr)
  2. library(dplyr)
  3. library(tidyr)
  4. # Read all raw datasets from UN into one dataframe ---------
  5. df <- list.files(path='data/raw/UN Data/', full.names = TRUE) %>%
  6. lapply(read_delim, delim = ',', skip = 1) %>%
  7. bind_rows
  8. # Remove columns that won't contribute to comparisons among countries
  9. # or variables that we will deal with later.
  10. df %>%
  11. select(-c(`Capital City`, `Capital City footnote`,
  12. `System of trade footnote`, `National currency`,
  13. `National currency footnote`, `Last Election Date footnote`,
  14. `Major trading partner 1 (% of exports) footnote`,
  15. `Tourism arrivals series type footnote`, Footnotes,
  16. `Region/Country/Area Year Series Value Footnotes Source`,
  17. Source, `Major trading partner 1 (% of exports)`,
  18. `Tourism arrivals series type`, `Last Election Date`,
  19. `System of trade`)) -> df
  20. # Bring it to a more usable format --------------------------------------------
  21. # Rename columns
  22. colnames(df) <- c('region_code', 'region_name', 'variable', 'year', 'value')
  23. # Let's work on this dataframe now
  24. df %>%
  25. # Keep only latest data for each variable
  26. group_by(variable) %>%
  27. filter(year == max(year)) %>%
  28. ungroup() %>%
  29. # Bring variables from rows to column
  30. pivot_wider(names_from = c(variable, year), values_from = value) -> df
  31. # region_code was important for having a unique ID but now it's useless
  32. df %>%
  33. select(-c(region_code)) -> df
  34. # Let's add major trading partner (exportations)
  35. # export
  36. trading_partners <- read_delim('data/raw/UN Data/SYB62_330_201907_Major Trading Partners.csv',
  37. skip = 1, delim=',')
  38. trading_partners %>%
  39. group_by(Series) %>%
  40. filter(Year == max(Year)) %>%
  41. filter(Series %in% c('Major trading partner 1 (% of exports)')) %>%
  42. pivot_wider(names_from = Series,
  43. values_from = `Major trading partner 1 (% of exports)`) %>%
  44. ungroup() %>%
  45. select(-c(`Region/Country/Area`, Year,
  46. `Major trading partner 1 (% of exports) footnote`, Value,
  47. Footnotes, Source)) -> trading_partners
  48. colnames(trading_partners) <- c('region_name', 'whos_major_trade_partner_exp_1')
  49. df <- left_join(df, trading_partners, by = 'region_name')
  50. rm(trading_partners)
  51. # Save UN dataset -------------------------------------
  52. write_tsv(x = df, path = 'data/raw/UN_dataset.tsv', quote_escape = FALSE)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...