Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 4.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  1. stages:
  2. collect-isbns:
  3. cmd: python ../run.py --rust collect-isbns -o all-isbns.parquet all-isbns.toml
  4. deps:
  5. - ../src/cli/collect_isbns.rs
  6. - all-isbns.toml
  7. - ../loc-mds/book-isbns.parquet
  8. - ../openlibrary/edition-isbns.parquet
  9. - ../goodreads/gr-book-ids.parquet
  10. - ../bx/cleaned-ratings.csv
  11. - ../az2014/ratings.parquet
  12. outs:
  13. - all-isbns.parquet
  14. cluster:
  15. cmd: python run.py --rust cluster-books
  16. wdir: ..
  17. deps:
  18. - src/cli/cluster_books.rs
  19. - src/graph/
  20. - book-links/all-isbns.parquet
  21. - loc-mds/book-ids.parquet
  22. - loc-mds/book-isbn-ids.parquet
  23. - openlibrary/editions.parquet
  24. - openlibrary/edition-isbn-ids.parquet
  25. - openlibrary/all-works.parquet
  26. - openlibrary/edition-works.parquet
  27. - goodreads/gr-book-ids.parquet
  28. - goodreads/book-isbn-ids.parquet
  29. outs:
  30. - book-links/isbn-clusters.parquet
  31. - book-links/cluster-stats.parquet
  32. - book-links/cluster-graph-nodes.parquet
  33. - book-links/cluster-graph-edges.parquet
  34. - book-links/book-graph.mp.zst
  35. metrics:
  36. - book-links/cluster-metrics.json
  37. cluster-ol-first-authors:
  38. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-ol-first-authors.parquet
  39. --first-author -s openlib
  40. wdir: ..
  41. deps:
  42. - src/cli/cluster
  43. - book-links/isbn-clusters.parquet
  44. - openlibrary/edition-isbn-ids.parquet
  45. - openlibrary/edition-authors.parquet
  46. - openlibrary/author-names.parquet
  47. outs:
  48. - book-links/cluster-ol-first-authors.parquet
  49. cluster-loc-first-authors:
  50. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-loc-first-authors.parquet
  51. --first-author -s loc
  52. wdir: ..
  53. deps:
  54. - src/cli/cluster
  55. - book-links/isbn-clusters.parquet
  56. - loc-mds/book-isbn-ids.parquet
  57. - loc-mds/book-authors.parquet
  58. outs:
  59. - book-links/cluster-loc-first-authors.parquet
  60. cluster-first-authors:
  61. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-first-authors.parquet
  62. --first-author -s openlib -s loc
  63. wdir: ..
  64. deps:
  65. - src/cli/cluster
  66. - book-links/isbn-clusters.parquet
  67. - openlibrary/edition-isbn-ids.parquet
  68. - openlibrary/edition-authors.parquet
  69. - openlibrary/author-names.parquet
  70. - loc-mds/book-isbn-ids.parquet
  71. - loc-mds/book-authors.parquet
  72. outs:
  73. - book-links/cluster-first-authors.parquet
  74. cluster-genders:
  75. cmd: python run.py --rust cluster extract-author-gender -o book-links/cluster-genders.parquet
  76. -A book-links/cluster-first-authors.parquet
  77. wdir: ..
  78. deps:
  79. - src/cli/cluster
  80. - book-links/cluster-stats.parquet
  81. - book-links/cluster-first-authors.parquet
  82. - viaf/author-name-index.parquet
  83. - viaf/author-genders.parquet
  84. outs:
  85. - book-links/cluster-genders.parquet
  86. gender-stats:
  87. cmd: python ../run.py --rust fusion integration-stats.tcl
  88. deps:
  89. - integration-stats.tcl
  90. - cluster-genders.parquet
  91. - isbn-clusters.parquet
  92. - ../loc-mds/book-isbn-ids.parquet
  93. - ../bx/bx-cluster-actions.parquet
  94. - ../bx/bx-cluster-ratings.parquet
  95. - ../az2014/az-cluster-ratings.parquet
  96. - ../goodreads/gr-cluster-actions.parquet
  97. - ../goodreads/gr-cluster-ratings.parquet
  98. outs:
  99. - gender-stats.csv
  100. cluster-hashes:
  101. cmd: python ../run.py --rust cluster hash -o cluster-hashes.parquet
  102. deps:
  103. - ../src/cli/cluster
  104. - isbn-clusters.parquet
  105. outs:
  106. - cluster-hashes.parquet
  107. schema:
  108. foreach:
  109. - all-isbns
  110. - isbn-clusters
  111. - cluster-stats
  112. - cluster-first-authors
  113. - cluster-genders
  114. - cluster-hashes
  115. do:
  116. cmd: python ../run.py --rust pq-info -o ${item}.json ${item}.parquet
  117. deps:
  118. - ${item}.parquet
  119. outs:
  120. - ${item}.json:
  121. cache: false
  122. # debug-graph:
  123. # cmd: python ./run.py --rust extract-graph --cluster ${debug.cluster} -o book-links/debug-graph.gml
  124. # wdir: ..
  125. # deps:
  126. # - src/bin/extract-graph.rs
  127. # - book-links/book-graph.mp.zst
  128. # outs:
  129. # - book-links/debug-graph.gml
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...