Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 2.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  1. stages:
  2. scan-authors:
  3. cmd: python ../run.py --rust openlib scan-authors ../data/openlib/ol_dump_authors.txt.gz
  4. deps:
  5. - ../src/cli/openlib.rs
  6. - ../src/openlib/
  7. - ../data/openlib/ol_dump_authors.txt.gz
  8. outs:
  9. - authors.parquet
  10. - author-names.parquet
  11. scan-works:
  12. cmd: python ../run.py --rust openlib scan-works ../data/openlib/ol_dump_works.txt.gz
  13. deps:
  14. - ../src/cli/openlib.rs
  15. - ../src/openlib/
  16. - ../data/openlib/ol_dump_works.txt.gz
  17. - authors.parquet
  18. outs:
  19. - works.parquet
  20. - work-authors.parquet
  21. - work-subjects.parquet
  22. - author-ids-after-works.parquet
  23. scan-editions:
  24. cmd: python ../run.py --rust openlib scan-editions ../data/openlib/ol_dump_editions.txt.gz
  25. deps:
  26. - ../src/cli/openlib.rs
  27. - ../src/openlib/
  28. - ../data/openlib/ol_dump_editions.txt.gz
  29. - authors.parquet
  30. - works.parquet
  31. - author-ids-after-works.parquet
  32. outs:
  33. - editions.parquet
  34. - edition-works.parquet
  35. - edition-isbns.parquet
  36. - edition-authors.parquet
  37. - edition-subjects.parquet
  38. - all-works.parquet
  39. - all-authors.parquet
  40. edition-isbn-ids:
  41. cmd: python run.py --rust link-isbn-ids -R edition -o openlibrary/edition-isbn-ids.parquet openlibrary/edition-isbns.parquet
  42. wdir: ..
  43. deps:
  44. - openlibrary/edition-isbns.parquet
  45. - book-links/all-isbns.parquet
  46. outs:
  47. - openlibrary/edition-isbn-ids.parquet
  48. work-clusters:
  49. cmd: python run.py --rust cluster extract-books -n work_id -o openlibrary/work-clusters.parquet OL-W
  50. wdir: ..
  51. deps:
  52. - book-links/cluster-graph-nodes.parquet
  53. outs:
  54. - openlibrary/work-clusters.parquet
  55. schema:
  56. foreach:
  57. - authors
  58. - author-names
  59. - works
  60. - work-authors
  61. - work-subjects
  62. - work-clusters
  63. - editions
  64. - edition-works
  65. - edition-isbns
  66. - edition-authors
  67. - edition-subjects
  68. - all-works
  69. - all-authors
  70. - edition-isbn-ids
  71. do:
  72. cmd: python ../run.py --rust pq-info -o ${item}.json ${item}.parquet
  73. deps:
  74. - ${item}.parquet
  75. outs:
  76. - ${item}.json:
  77. cache: false
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...