Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.jsonnet 1.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
  1. local bd = import './lib.jsonnet';
  2. local subpipes = {
  3. 'loc-mds': import 'loc-mds/dvc.jsonnet',
  4. openlibrary: import 'openlibrary/dvc.jsonnet',
  5. viaf: import 'viaf/dvc.jsonnet',
  6. az2014: import 'az2014/dvc.jsonnet',
  7. az2018: import 'az2018/dvc.jsonnet',
  8. bx: import 'bx/dvc.jsonnet',
  9. goodreads: import 'goodreads/dvc.jsonnet',
  10. 'goodreads/simple': import 'goodreads/simple/dvc.jsonnet',
  11. 'goodreads/full': import 'goodreads/full/dvc.jsonnet',
  12. 'book-links': import 'book-links/dvc.jsonnet',
  13. };
  14. local parquets = [
  15. std.strReplace(out, '.parquet', '')
  16. for dir in std.objectFields(subpipes)
  17. for stage in std.objectValues(subpipes[dir].stages)
  18. for out in bd.stageOuts(dir, stage)
  19. if std.endsWith(out, '.parquet')
  20. ];
  21. local notebook = function(name, deps=[]) {
  22. cmd: std.format('quarto render %s.qmd --to html', name),
  23. deps: [
  24. name + '.qmd',
  25. ] + deps,
  26. outs: [
  27. { [name + '.ipynb']: { cache: false } },
  28. name + '.html',
  29. name + '_files',
  30. ],
  31. };
  32. bd.pipeline({
  33. ClusterStats: notebook('ClusterStats', ['book-links/cluster-stats.parquet']),
  34. LinkageStats: notebook('LinkageStats', [
  35. 'book-links/gender-stats.csv',
  36. ]),
  37. schema: {
  38. foreach: parquets,
  39. do: {
  40. cmd: bd.cmd('pq-info -o ${item}.json ${item}.parquet'),
  41. deps: ['${item}.parquet'],
  42. outs: [
  43. { '${item}.json': { cache: false } },
  44. ],
  45. },
  46. },
  47. })
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...