1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
- local bd = import '../lib.jsonnet';
- local loc = {
- dl_base: 'https://www.loc.gov/cds/downloads/MDSConnect/',
- book_base: 'BooksAll.2016.part',
- book_range: '01-43',
- name_base: 'Names.2016.part',
- name_range: '01-40',
- };
- local olUrl = function(part, date)
- std.format('https://openlibrary.org/data/ol_dump_%s_%s.txt.gz', [part, date]);
- local viafUrl = function(date)
- std.format('https://viaf.org/viaf/data/viaf-%s-clusters-marc21.xml.gz', [std.strReplace(date, '-', '')]);
- local mdsCurl = function(folder, base, range) {
- local url = std.format('%s%s[%s].xml.gz', [loc.dl_base, base, range]),
- local out = std.format('%s/%s#1.xml.gz', [folder, base]),
- cmd: std.format('curl -L %s -o %s --create-dirs', [url, out]),
- outs: [folder],
- };
- local curl = function(url, file) {
- cmd: std.format('curl -L --retry 100 -o %s %s', [file, url]),
- outs: [file],
- };
- bd.pipeline({
- 'loc-books': mdsCurl('loc-books', loc.book_base, loc.book_range),
- 'loc-names': mdsCurl('loc-names', loc.name_base, loc.name_range),
- 'viaf-clusters': curl(viafUrl(bd.config.viaf.date), 'viaf-clusters-marc21.xml.gz'),
- 'ol-editions': curl(olUrl('editions', bd.config.openlibrary.date), 'openlib/ol_dump_editions.txt.gz'),
- 'ol-authors': curl(olUrl('authors', bd.config.openlibrary.date), 'openlib/ol_dump_authors.txt.gz'),
- 'ol-works': curl(olUrl('works', bd.config.openlibrary.date), 'openlib/ol_dump_works.txt.gz'),
- })
|