1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
- local bd = import '../lib.jsonnet';
- local source_stages = {
- ratings: {
- 'scan-ratings': {
- cmd: bd.cmd('amazon scan-ratings -o ratings.parquet --swap-id-columns ../data/az2018/Books.csv'),
- deps: [
- '../src/amazon.rs',
- '../src/cli/amazon/',
- '../data/az2018/Books.csv',
- ],
- outs: ['ratings.parquet'],
- },
- },
- reviews: {
- 'scan-reviews': {
- cmd: bd.cmd('amazon scan-reviews --rating-output ratings.parquet --review-output reviews.parquet ../data/az2018/Books.json.gz'),
- deps: [
- '../src/amazon.rs',
- '../src/cli/amazon/',
- '../data/az2018/Books.json.gz',
- ],
- outs: [
- 'ratings.parquet',
- 'reviews.parquet',
- ],
- },
- },
- };
- bd.pipeline(source_stages[bd.config.az2018.source] {
- 'cluster-ratings': {
- wdir: '..',
- cmd: bd.cmd('amazon cluster-ratings -o az2018/az-cluster-ratings.parquet az2018/ratings.parquet'),
- deps: [
- 'src/cli/amazon',
- 'az2018/ratings.parquet',
- 'book-links/isbn-clusters.parquet',
- ],
- outs: ['az2018/az-cluster-ratings.parquet'],
- },
- 'cluster-ratings-5core': {
- cmd: bd.cmd('kcore -o az-cluster-ratings-5core.parquet az-cluster-ratings.parquet'),
- deps: [
- 'az-cluster-ratings.parquet',
- '../src/cli/kcore.rs',
- ],
- outs: ['az-cluster-ratings-5core.parquet'],
- },
- }, bd.config.az2018.enabled)
|