Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 5.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  1. schema: '2.0'
  2. stages:
  3. scan-books:
  4. cmd: cargo run --release -- scan-marc --book-mode --glob "../data/loc-books/BooksAll.2016*.xml.gz"
  5. deps:
  6. - path: ../data/loc-books
  7. md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
  8. size: 3129774145
  9. nfiles: 43
  10. - path: ../src/cli/scan_marc.rs
  11. hash: md5
  12. md5: 0663aa5a5d2fe2a3c2fdb505170a5cc2
  13. size: 3934
  14. - path: ../src/marc
  15. hash: md5
  16. md5: 874e3a2ea08a2d41e3c54b9d7a2032c1.dir
  17. size: 22963
  18. nfiles: 5
  19. outs:
  20. - path: book-authors.parquet
  21. hash: md5
  22. md5: 0994ad02a21d496e6ff14c7001147f67
  23. size: 88233101
  24. - path: book-fields.parquet
  25. hash: md5
  26. md5: 472830445ffa794290a6f1f519af761a
  27. size: 3066979138
  28. - path: book-ids.parquet
  29. hash: md5
  30. md5: c2651736c069b5836563de8028815deb
  31. size: 71130687
  32. - path: book-isbns.parquet
  33. hash: md5
  34. md5: a954fd606bfcc6ff9db66af964dd2239
  35. size: 57538194
  36. book-authors:
  37. cmd: python ../run.py --rust fusion book-authors.tcl
  38. deps:
  39. - path: book-authors.tcl
  40. md5: 56396f59e4ea77683b2af8e74d205a4f
  41. size: 250
  42. - path: book-fields.parquet
  43. md5: b305bde2a13f77c0cb62ed586574480c
  44. size: 3094731732
  45. outs:
  46. - path: book-authors.parquet
  47. md5: c46c8998dfdfd25b91c88b225a436466
  48. size: 100853758
  49. scan-names:
  50. cmd: cargo run --release -- scan-marc --glob "../data/loc-names/Names.2016*.xml.gz"
  51. -o name-fields.parquet
  52. deps:
  53. - path: ../data/loc-names
  54. md5: fc488a8775561070cced774803fe0d72.dir
  55. size: 1410755359
  56. nfiles: 40
  57. - path: ../src/cli/scan_marc.rs
  58. hash: md5
  59. md5: 0663aa5a5d2fe2a3c2fdb505170a5cc2
  60. size: 3934
  61. - path: ../src/marc
  62. hash: md5
  63. md5: 874e3a2ea08a2d41e3c54b9d7a2032c1.dir
  64. size: 22963
  65. nfiles: 5
  66. outs:
  67. - path: name-fields.parquet
  68. hash: md5
  69. md5: 35b9abf74ec583ac01f226478958bd99
  70. size: 1574481968
  71. author-names:
  72. cmd: python ../run.py --rust fusion author-names.tcl
  73. deps:
  74. - path: author-names.tcl
  75. md5: 29c68172aa2a10d6054b73656c21567c
  76. size: 193
  77. - path: name-fields.parquet
  78. md5: 800b068931bb1a2cba59b85b3ebf471d
  79. size: 1543059992
  80. outs:
  81. - path: author-names.csv.gz
  82. md5: 9e68c20496a2f62922334701f9e1492f
  83. size: 87125358
  84. author-genders:
  85. cmd: python ../run.py --rust fusion author-genders.tcl
  86. deps:
  87. - path: author-genders.tcl
  88. md5: 58abbe4a110eb782450d9db0e461e087
  89. size: 197
  90. - path: name-fields.parquet
  91. md5: 800b068931bb1a2cba59b85b3ebf471d
  92. size: 1543059992
  93. outs:
  94. - path: author-genders.csv.gz
  95. md5: 71a6ef56b674c032c5a572d478e66762
  96. size: 1469682
  97. book-isbn-ids:
  98. cmd: cargo run --release -- link-isbn-ids -R rec_id -o loc-mds/book-isbn-ids.parquet
  99. loc-mds/book-isbns.parquet
  100. deps:
  101. - path: book-links/all-isbns.parquet
  102. hash: md5
  103. md5: 8803c162ab97efac8b098df7e9252314
  104. size: 464207536
  105. - path: loc-mds/book-isbns.parquet
  106. hash: md5
  107. md5: a954fd606bfcc6ff9db66af964dd2239
  108. size: 57538194
  109. outs:
  110. - path: loc-mds/book-isbn-ids.parquet
  111. hash: md5
  112. md5: 719c232a3bc19f0510607c31f024b1db
  113. size: 39838250
  114. loc-clusters:
  115. cmd: python ../run.py --rust fusion loc-clusters.tcl
  116. deps:
  117. - path: ../book-links/cluster-graph-nodes.parquet
  118. md5: 6d0ebe8ef88733428ac88a222f959b96
  119. size: 798139218
  120. - path: loc-clusters.tcl
  121. md5: 31716e72f6d58f047ea391109be5d806
  122. size: 221
  123. outs:
  124. - path: loc-clusters.parquet
  125. md5: 0748a8001c63142ebda3d099b6ebd84c
  126. size: 35754914
  127. schema@book-fields:
  128. cmd: python ../run.py --rust pq-info -o book-fields.json book-fields.parquet
  129. deps:
  130. - path: book-fields.parquet
  131. md5: 426ffd30f8659a5b7615dfab372b3ba0
  132. size: 2763440545
  133. outs:
  134. - path: book-fields.json
  135. md5: f5f037b02b62afd7df6194dee5cd86e5
  136. size: 693
  137. schema@book-isbn-ids:
  138. cmd: python ../run.py --rust pq-info -o book-isbn-ids.json book-isbn-ids.parquet
  139. deps:
  140. - path: book-isbn-ids.parquet
  141. md5: 1022f7b00a26899573dd319637cb098e
  142. size: 34297513
  143. outs:
  144. - path: book-isbn-ids.json
  145. md5: 71bb49d90e205b89033d2d951f2288ff
  146. size: 249
  147. schema@book-ids:
  148. cmd: python ../run.py --rust pq-info -o book-ids.json book-ids.parquet
  149. deps:
  150. - path: book-ids.parquet
  151. md5: b8e4eb3ee0b6330bcd655a64b1fa89e6
  152. size: 65210771
  153. outs:
  154. - path: book-ids.json
  155. md5: 82a4c2b33bde2b1835d3786818928495
  156. size: 605
  157. schema@book-authors:
  158. cmd: python ../run.py --rust pq-info -o book-authors.json book-authors.parquet
  159. deps:
  160. - path: book-authors.parquet
  161. md5: 415b0a79bb7bc1c7c6651eaccf0d9d30
  162. size: 85290815
  163. outs:
  164. - path: book-authors.json
  165. md5: 4f6d1f44d92cd6e4efe52a095097d92e
  166. size: 252
  167. schema@book-isbns:
  168. cmd: python ../run.py --rust pq-info -o book-isbns.json book-isbns.parquet
  169. deps:
  170. - path: book-isbns.parquet
  171. md5: ca03e6c54a490743746f2bc2a73ad3f0
  172. size: 55680857
  173. outs:
  174. - path: book-isbns.json
  175. md5: e37c1c973ddd9106deba98d15bba761b
  176. size: 329
  177. schema@loc-clusters:
  178. cmd: python ../run.py --rust pq-info -o loc-clusters.json loc-clusters.parquet
  179. deps:
  180. - path: loc-clusters.parquet
  181. md5: 0748a8001c63142ebda3d099b6ebd84c
  182. size: 35754914
  183. outs:
  184. - path: loc-clusters.json
  185. md5: a9217be376ffffe877ee4f3a2f0dcbb9
  186. size: 352
  187. schema@name-fields:
  188. cmd: python ../run.py --rust pq-info -o name-fields.json name-fields.parquet
  189. deps:
  190. - path: name-fields.parquet
  191. md5: fb0b961cc7a916930e10949d8da5a0df
  192. size: 1422606012
  193. outs:
  194. - path: name-fields.json
  195. md5: b1397f21e19ad39c1014f823091116d9
  196. size: 693
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...