Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.jsonnet 1.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  1. local bd = import './lib.jsonnet';
  2. local subpipes = {
  3. 'loc-mds': import 'loc-mds/dvc.jsonnet',
  4. openlibrary: import 'openlibrary/dvc.jsonnet',
  5. viaf: import 'viaf/dvc.jsonnet',
  6. az2014: import 'az2014/dvc.jsonnet',
  7. az2018: import 'az2018/dvc.jsonnet',
  8. bx: import 'bx/dvc.jsonnet',
  9. goodreads: import 'goodreads/dvc.jsonnet',
  10. 'book-links': import 'book-links/dvc.jsonnet',
  11. };
  12. local parquets = [
  13. std.strReplace(out, '.parquet', '')
  14. for dir in std.objectFields(subpipes)
  15. for stage in std.objectValues(subpipes[dir].stages)
  16. for out in bd.stageOuts(dir, stage)
  17. if std.endsWith(out, '.parquet')
  18. ];
  19. local notebook = function(name, deps=[]) {
  20. cmd: std.format('quarto render %s.qmd --to html', name),
  21. deps: [
  22. name + '.qmd',
  23. ] + deps,
  24. outs: [
  25. { [name + '.ipynb']: { cache: false } },
  26. name + '.html',
  27. name + '_files',
  28. ],
  29. };
  30. bd.pipeline({
  31. ClusterStats: notebook('ClusterStats', ['book-links/cluster-stats.parquet']),
  32. LinkageStats: notebook('LinkageStats', [
  33. 'book-links/gender-stats.csv',
  34. ]),
  35. schema: {
  36. foreach: parquets,
  37. do: {
  38. cmd: bd.cmd('pq-info -o ${item}.json ${item}.parquet'),
  39. deps: ['${item}.parquet'],
  40. outs: [
  41. { '${item}.json': { cache: false } },
  42. ],
  43. },
  44. },
  45. })
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...