Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 11 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
  1. schema: '2.0'
  2. stages:
  3. collect-isbns:
  4. cmd: python ../run.py --rust collect-isbns -o all-isbns.parquet all-isbns.toml
  5. deps:
  6. - path: ../az2014/ratings.parquet
  7. md5: f3e8018263dadb98a221f7a0fd0f6821
  8. size: 147804724
  9. - path: ../bx/cleaned-ratings.csv
  10. md5: da3196d4af84ae39b3713e73c59677c7
  11. size: 22627884
  12. - path: ../goodreads/gr-book-ids.parquet
  13. md5: bd138e1db016e3533554f08971ed28eb
  14. size: 36155423
  15. - path: ../loc-mds/book-isbns.parquet
  16. md5: b692a9b00ba567028f566e985e503722
  17. size: 54291815
  18. - path: ../openlibrary/edition-isbns.parquet
  19. md5: 4aae4d3eaf2ff663d71b9965ccf59133
  20. size: 132821740
  21. - path: ../src/cli/collect_isbns.rs
  22. md5: 6cd9a1b90cb1d613ff7ba73c6f8be741
  23. size: 2911
  24. - path: all-isbns.toml
  25. md5: 8d1990ca34a435e464021afb3b5fc9ed
  26. size: 399
  27. outs:
  28. - path: all-isbns.parquet
  29. md5: 29cc7ae0b86b0e0f5aa7389fdea7de5b
  30. size: 193709539
  31. cluster:
  32. cmd: python run.py --rust cluster-books
  33. deps:
  34. - path: book-links/all-isbns.parquet
  35. md5: 29cc7ae0b86b0e0f5aa7389fdea7de5b
  36. size: 193709539
  37. - path: goodreads/book-isbn-ids.parquet
  38. md5: 299f5bfc7c2244f9a83598dbd56d6719
  39. size: 19658190
  40. - path: goodreads/gr-book-ids.parquet
  41. md5: bd138e1db016e3533554f08971ed28eb
  42. size: 36155423
  43. - path: loc-mds/book-ids.parquet
  44. md5: 7f47ba0c4aa3b202f0f1f93774b40f17
  45. size: 64587938
  46. - path: loc-mds/book-isbn-ids.parquet
  47. md5: 8a1b9d423ecf63253f29cc0ee2d7e8df
  48. size: 46596848
  49. - path: openlibrary/all-works.parquet
  50. md5: 43534d6324f0021769fdd8ed3287c48b
  51. size: 147842482
  52. - path: openlibrary/edition-isbn-ids.parquet
  53. md5: f1bb7e35d883c19e733608ae58d8981b
  54. size: 127237646
  55. - path: openlibrary/edition-works.parquet
  56. md5: 295d170711290ac99b92e60e179bcf7f
  57. size: 146555043
  58. - path: openlibrary/editions.parquet
  59. md5: 48972480e58c663c3feff6e3d698c2ba
  60. size: 723946186
  61. - path: src/cli/cluster_books.rs
  62. md5: a7be7a18af7ac92557becf4c84fae075
  63. size: 4615
  64. - path: src/graph/
  65. md5: eaaf19b503f6fff1db4a6360e50f3d64.dir
  66. size: 10386
  67. nfiles: 4
  68. outs:
  69. - path: book-links/book-graph.mp.zst
  70. md5: 5c8a331e19adc1adf997fcf516fc8ad9
  71. size: 1055861783
  72. - path: book-links/cluster-graph-edges.parquet
  73. md5: ec419d3c7da6aee47ebeeb3d1e14ac15
  74. size: 360353914
  75. - path: book-links/cluster-graph-nodes.parquet
  76. md5: faa95b816d304431065f2c83caf85d02
  77. size: 625547671
  78. - path: book-links/cluster-metrics.json
  79. md5: 285ad00f741e212b9e89405ced7deffa
  80. size: 37
  81. - path: book-links/cluster-stats.parquet
  82. md5: 00343d6b07198c9d26de2d46ce785206
  83. size: 122446262
  84. - path: book-links/isbn-clusters.parquet
  85. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  86. size: 229330723
  87. cluster-ol-first-authors:
  88. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-ol-first-authors.parquet
  89. --first-author -s openlib
  90. deps:
  91. - path: book-links/isbn-clusters.parquet
  92. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  93. size: 229330723
  94. - path: openlibrary/author-names.parquet
  95. md5: bbe41b19f31882eb0128142b351cc077
  96. size: 126448846
  97. - path: openlibrary/edition-authors.parquet
  98. md5: 52d351f649c6cce481da4b4df6393940
  99. size: 136280002
  100. - path: openlibrary/edition-isbn-ids.parquet
  101. md5: f1bb7e35d883c19e733608ae58d8981b
  102. size: 127237646
  103. - path: src/cli/cluster
  104. md5: a1d600e4111815f34cd366be955d3f8f.dir
  105. size: 25891
  106. nfiles: 11
  107. outs:
  108. - path: book-links/cluster-ol-first-authors.parquet
  109. md5: 04895650f15e77b085b15b718dbc65c9
  110. size: 144423691
  111. cluster-loc-first-authors:
  112. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-loc-first-authors.parquet
  113. --first-author -s loc
  114. deps:
  115. - path: book-links/isbn-clusters.parquet
  116. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  117. size: 229330723
  118. - path: loc-mds/book-authors.parquet
  119. md5: 8bb05bd8c1cd99c6d7d10b1ab10d6489
  120. size: 103616710
  121. - path: loc-mds/book-isbn-ids.parquet
  122. md5: 8a1b9d423ecf63253f29cc0ee2d7e8df
  123. size: 46596848
  124. - path: src/cli/cluster
  125. md5: a1d600e4111815f34cd366be955d3f8f.dir
  126. size: 25891
  127. nfiles: 11
  128. outs:
  129. - path: book-links/cluster-loc-first-authors.parquet
  130. md5: 64f368ee99c996d5b685a4e6538a0ebc
  131. size: 55799494
  132. cluster-irst-authors:
  133. cmd: python run.py --rust cluster-authors -o book-links/cluster-first-authors.parquet
  134. --first-author -s openlib -s loc
  135. deps:
  136. - path: book-links/isbn-clusters.parquet
  137. md5: 5ee9bb4b67de722e24363e140ea2791f
  138. size: 161946382
  139. - path: loc-mds/book-authors.parquet
  140. md5: 2d3212dbf2405c48fb8bf2587a8587c3
  141. size: 127567010
  142. - path: loc-mds/book-isbn-ids.parquet
  143. md5: 6a764a693a9baad23d7c489cd3f7bfc9
  144. size: 70553002
  145. - path: openlibrary/author-names.parquet
  146. md5: 4e002793c585a769fc1334827697837e
  147. size: 142462847
  148. - path: openlibrary/edition-authors.parquet
  149. md5: 7172b92182942d728830cdac0b4862b6
  150. size: 164027959
  151. - path: openlibrary/edition-isbn-ids.parquet
  152. md5: afa5abff7a53ed402ba9ce4fb2a09635
  153. size: 214125175
  154. - path: src/bin/cluster-authors.rs
  155. md5: 76f614625858e7ad84e273e875861490
  156. size: 5253
  157. outs:
  158. - path: book-links/cluster-first-authors.parquet
  159. md5: a7f439659cccd809efe4ac2ea276b058
  160. size: 157637977
  161. cluster-first-authors:
  162. cmd: python run.py --rust cluster extract-authors -o book-links/cluster-first-authors.parquet
  163. --first-author -s openlib -s loc
  164. deps:
  165. - path: book-links/isbn-clusters.parquet
  166. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  167. size: 229330723
  168. - path: loc-mds/book-authors.parquet
  169. md5: 8bb05bd8c1cd99c6d7d10b1ab10d6489
  170. size: 103616710
  171. - path: loc-mds/book-isbn-ids.parquet
  172. md5: 8a1b9d423ecf63253f29cc0ee2d7e8df
  173. size: 46596848
  174. - path: openlibrary/author-names.parquet
  175. md5: bbe41b19f31882eb0128142b351cc077
  176. size: 126448846
  177. - path: openlibrary/edition-authors.parquet
  178. md5: 52d351f649c6cce481da4b4df6393940
  179. size: 136280002
  180. - path: openlibrary/edition-isbn-ids.parquet
  181. md5: f1bb7e35d883c19e733608ae58d8981b
  182. size: 127237646
  183. - path: src/cli/cluster
  184. md5: a1d600e4111815f34cd366be955d3f8f.dir
  185. size: 25891
  186. nfiles: 11
  187. outs:
  188. - path: book-links/cluster-first-authors.parquet
  189. md5: 36512d5a6527bf991d128972581f32e3
  190. size: 177650717
  191. cluster-genders:
  192. cmd: python run.py --rust cluster extract-author-gender -o book-links/cluster-genders.parquet
  193. -A book-links/cluster-first-authors.parquet
  194. deps:
  195. - path: book-links/cluster-first-authors.parquet
  196. md5: 36512d5a6527bf991d128972581f32e3
  197. size: 177650717
  198. - path: book-links/cluster-stats.parquet
  199. md5: 00343d6b07198c9d26de2d46ce785206
  200. size: 122446262
  201. - path: src/cli/cluster
  202. md5: a1d600e4111815f34cd366be955d3f8f.dir
  203. size: 25891
  204. nfiles: 11
  205. - path: viaf/author-genders.parquet
  206. md5: cbd6871e6ba229881aed5f58126fd652
  207. size: 102791330
  208. - path: viaf/author-name-index.parquet
  209. md5: 663023a30c3d2f93ca4dea7d26793e9d
  210. size: 483832252
  211. outs:
  212. - path: book-links/cluster-genders.parquet
  213. md5: 8cb24dcbda2c1559fbfd67583b81e60f
  214. size: 106885602
  215. gender-stats:
  216. cmd: python ../run.py --rust fusion integration-stats.tcl
  217. deps:
  218. - path: ../az2014/az-cluster-ratings.parquet
  219. md5: 674a41980f410014f5e04036cecec492
  220. size: 302707350
  221. - path: ../bx/bx-cluster-actions.parquet
  222. md5: ef6ef99b0f6702258ee776afe194460c
  223. size: 7251481
  224. - path: ../bx/bx-cluster-ratings.parquet
  225. md5: b37f08a3f38031c23c4db5cd73a5dc7d
  226. size: 3043911
  227. - path: ../goodreads/gr-cluster-actions.parquet
  228. md5: 6cc9fcf431dc4a72d0efc16402bb522a
  229. size: 2917412389
  230. - path: ../goodreads/gr-cluster-ratings.parquet
  231. md5: 223918febaed7724a1e77f60e964ed5f
  232. size: 1400932246
  233. - path: ../loc-mds/book-isbn-ids.parquet
  234. md5: 8a1b9d423ecf63253f29cc0ee2d7e8df
  235. size: 46596848
  236. - path: cluster-genders.parquet
  237. md5: 8cb24dcbda2c1559fbfd67583b81e60f
  238. size: 106885602
  239. - path: integration-stats.tcl
  240. md5: 0b47c45c920d62c0c476e1eca3b312a5
  241. size: 2019
  242. - path: isbn-clusters.parquet
  243. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  244. size: 229330723
  245. outs:
  246. - path: gender-stats.csv
  247. md5: 44405ed9653db449ebf4588d163a047d
  248. size: 1115
  249. cluster-hashes:
  250. cmd: python ../run.py --rust cluster hash -o cluster-hashes.parquet
  251. deps:
  252. - path: ../src/cli/cluster
  253. md5: a1d600e4111815f34cd366be955d3f8f.dir
  254. size: 25891
  255. nfiles: 11
  256. - path: isbn-clusters.parquet
  257. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  258. size: 229330723
  259. outs:
  260. - path: cluster-hashes.parquet
  261. md5: 80bc62083421cbab1558545bfa3aca08
  262. size: 270217045
  263. debug-graph:
  264. cmd: python ./run.py --rust extract-graph --cluster 100004141 -o book-links/debug-graph.gml
  265. deps:
  266. - path: book-links/book-graph.mp.zst
  267. md5: cbb067efc568960af703f83f5b73e9ac
  268. size: 1283174149
  269. - path: src/bin/extract-graph.rs
  270. md5: c22a75eb91d779306a11d8bd2065b3e6
  271. size: 1142
  272. outs:
  273. - path: book-links/debug-graph.gml
  274. md5: b2fb3c4a07ace6f85a1b880b54ab5da8
  275. size: 2144356
  276. schema@cluster-stats:
  277. cmd: python ../run.py --rust pq-info -o cluster-stats.json cluster-stats.parquet
  278. deps:
  279. - path: cluster-stats.parquet
  280. md5: 00343d6b07198c9d26de2d46ce785206
  281. size: 122446262
  282. outs:
  283. - path: cluster-stats.json
  284. md5: fdf6157f2ed9d75095fa3c930f175467
  285. size: 1232
  286. schema@cluster-first-authors:
  287. cmd: python ../run.py --rust pq-info -o cluster-first-authors.json cluster-first-authors.parquet
  288. deps:
  289. - path: cluster-first-authors.parquet
  290. md5: 36512d5a6527bf991d128972581f32e3
  291. size: 177650717
  292. outs:
  293. - path: cluster-first-authors.json
  294. md5: 21429ff175c1e0f0ee1baf3ba2e94044
  295. size: 358
  296. schema@cluster-genders:
  297. cmd: python ../run.py --rust pq-info -o cluster-genders.json cluster-genders.parquet
  298. deps:
  299. - path: cluster-genders.parquet
  300. md5: 8cb24dcbda2c1559fbfd67583b81e60f
  301. size: 106885602
  302. outs:
  303. - path: cluster-genders.json
  304. md5: 6c8281c59066f473d0b429fb1c3697e0
  305. size: 353
  306. schema@all-isbns:
  307. cmd: python ../run.py --rust pq-info -o all-isbns.json all-isbns.parquet
  308. deps:
  309. - path: all-isbns.parquet
  310. md5: 29cc7ae0b86b0e0f5aa7389fdea7de5b
  311. size: 193709539
  312. outs:
  313. - path: all-isbns.json
  314. md5: 11ba5c5b21fbf50c9513659bd3d7cc63
  315. size: 1039
  316. schema@cluster-hashes:
  317. cmd: python ../run.py --rust pq-info -o cluster-hashes.json cluster-hashes.parquet
  318. deps:
  319. - path: cluster-hashes.parquet
  320. md5: 80bc62083421cbab1558545bfa3aca08
  321. size: 270217045
  322. outs:
  323. - path: cluster-hashes.json
  324. md5: e0b43e5875d8df9e9aff168d83a2cf6e
  325. size: 500
  326. schema@isbn-clusters:
  327. cmd: python ../run.py --rust pq-info -o isbn-clusters.json isbn-clusters.parquet
  328. deps:
  329. - path: isbn-clusters.parquet
  330. md5: 9bb3676bec6b5b3c03fe4c25701c21f4
  331. size: 229330723
  332. outs:
  333. - path: isbn-clusters.json
  334. md5: 766771cee94d211bc8f0b980cd2844bf
  335. size: 492
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...