Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.lock 5.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
  1. schema: '2.0'
  2. stages:
  3. scan-books:
  4. cmd: python ../run.py --rust scan-marc --book-mode --glob "../data/loc-books/BooksAll.2016*.xml.gz"
  5. deps:
  6. - path: ../data/loc-books
  7. md5: 1b1e7ab1d98cc81e373dfc53345d4bb7.dir
  8. size: 3129774145
  9. nfiles: 43
  10. - path: ../src/cli/scan_marc.rs
  11. md5: bcbc2c6cfebc08fbd4072a6b01fba27d
  12. size: 3706
  13. - path: ../src/marc
  14. md5: 066418c6e3db231224a89a97aa94da9f.dir
  15. size: 19381
  16. nfiles: 5
  17. outs:
  18. - path: book-authors.parquet
  19. md5: 3585eceab422d960e403a2a146bb0b5f
  20. size: 84128212
  21. - path: book-fields.parquet
  22. md5: e90234eba3c42a15c0514a5178d222d7
  23. size: 2703843200
  24. - path: book-ids.parquet
  25. md5: 38346ca168b32be26216dba43e5a6ef4
  26. size: 66927842
  27. - path: book-isbns.parquet
  28. md5: 0775712f467248239f0d8f6e6daa3302
  29. size: 56227773
  30. book-authors:
  31. cmd: python ../run.py --rust fusion book-authors.tcl
  32. deps:
  33. - path: book-authors.tcl
  34. md5: 56396f59e4ea77683b2af8e74d205a4f
  35. size: 250
  36. - path: book-fields.parquet
  37. md5: b305bde2a13f77c0cb62ed586574480c
  38. size: 3094731732
  39. outs:
  40. - path: book-authors.parquet
  41. md5: c46c8998dfdfd25b91c88b225a436466
  42. size: 100853758
  43. scan-names:
  44. cmd: python ../run.py --rust scan-marc --glob "../data/loc-names/Names.2016*.xml.gz"
  45. -o name-fields.parquet
  46. deps:
  47. - path: ../data/loc-names
  48. md5: fc488a8775561070cced774803fe0d72.dir
  49. size: 1410755359
  50. nfiles: 40
  51. - path: ../src/cli/scan_marc.rs
  52. md5: bcbc2c6cfebc08fbd4072a6b01fba27d
  53. size: 3706
  54. - path: ../src/marc
  55. md5: 066418c6e3db231224a89a97aa94da9f.dir
  56. size: 19381
  57. nfiles: 5
  58. outs:
  59. - path: name-fields.parquet
  60. md5: 9643ffeda173dba838da8975c88bd958
  61. size: 1398366309
  62. author-names:
  63. cmd: python ../run.py --rust fusion author-names.tcl
  64. deps:
  65. - path: author-names.tcl
  66. md5: 29c68172aa2a10d6054b73656c21567c
  67. size: 193
  68. - path: name-fields.parquet
  69. md5: 800b068931bb1a2cba59b85b3ebf471d
  70. size: 1543059992
  71. outs:
  72. - path: author-names.csv.gz
  73. md5: 9e68c20496a2f62922334701f9e1492f
  74. size: 87125358
  75. author-genders:
  76. cmd: python ../run.py --rust fusion author-genders.tcl
  77. deps:
  78. - path: author-genders.tcl
  79. md5: 58abbe4a110eb782450d9db0e461e087
  80. size: 197
  81. - path: name-fields.parquet
  82. md5: 800b068931bb1a2cba59b85b3ebf471d
  83. size: 1543059992
  84. outs:
  85. - path: author-genders.csv.gz
  86. md5: 71a6ef56b674c032c5a572d478e66762
  87. size: 1469682
  88. book-isbn-ids:
  89. cmd: python run.py --rust link-isbn-ids -R rec_id -o loc-mds/book-isbn-ids.parquet
  90. loc-mds/book-isbns.parquet
  91. deps:
  92. - path: book-links/all-isbns.parquet
  93. md5: f1de86d41234d3b0ea975be3a188a0cc
  94. size: 275147630
  95. - path: loc-mds/book-isbns.parquet
  96. md5: 0775712f467248239f0d8f6e6daa3302
  97. size: 56227773
  98. outs:
  99. - path: loc-mds/book-isbn-ids.parquet
  100. md5: d318bbfa7f023370f780aa74c0bd60b5
  101. size: 33782689
  102. loc-clusters:
  103. cmd: python ../run.py --rust fusion loc-clusters.tcl
  104. deps:
  105. - path: ../book-links/cluster-graph-nodes.parquet
  106. md5: 6d0ebe8ef88733428ac88a222f959b96
  107. size: 798139218
  108. - path: loc-clusters.tcl
  109. md5: 31716e72f6d58f047ea391109be5d806
  110. size: 221
  111. outs:
  112. - path: loc-clusters.parquet
  113. md5: 0748a8001c63142ebda3d099b6ebd84c
  114. size: 35754914
  115. schema@book-fields:
  116. cmd: python ../run.py --rust pq-info -o book-fields.json book-fields.parquet
  117. deps:
  118. - path: book-fields.parquet
  119. md5: e90234eba3c42a15c0514a5178d222d7
  120. size: 2703843200
  121. outs:
  122. - path: book-fields.json
  123. md5: caafeec111379cec95011e60ed31ec8d
  124. size: 693
  125. schema@book-isbn-ids:
  126. cmd: python ../run.py --rust pq-info -o book-isbn-ids.json book-isbn-ids.parquet
  127. deps:
  128. - path: book-isbn-ids.parquet
  129. md5: d318bbfa7f023370f780aa74c0bd60b5
  130. size: 33782689
  131. outs:
  132. - path: book-isbn-ids.json
  133. md5: 12cc90bd30dce870a246014e288fc7fa
  134. size: 249
  135. schema@book-ids:
  136. cmd: python ../run.py --rust pq-info -o book-ids.json book-ids.parquet
  137. deps:
  138. - path: book-ids.parquet
  139. md5: 38346ca168b32be26216dba43e5a6ef4
  140. size: 66927842
  141. outs:
  142. - path: book-ids.json
  143. md5: 9f886551928eceb2b60ffbc0c7ce9687
  144. size: 605
  145. schema@book-authors:
  146. cmd: python ../run.py --rust pq-info -o book-authors.json book-authors.parquet
  147. deps:
  148. - path: book-authors.parquet
  149. md5: 3585eceab422d960e403a2a146bb0b5f
  150. size: 84128212
  151. outs:
  152. - path: book-authors.json
  153. md5: 3f2bc8e71f9a4d8c5623f966780b40bf
  154. size: 252
  155. schema@book-isbns:
  156. cmd: python ../run.py --rust pq-info -o book-isbns.json book-isbns.parquet
  157. deps:
  158. - path: book-isbns.parquet
  159. md5: 0775712f467248239f0d8f6e6daa3302
  160. size: 56227773
  161. outs:
  162. - path: book-isbns.json
  163. md5: 92446742844cfef47d5baa62d5586bdb
  164. size: 329
  165. schema@loc-clusters:
  166. cmd: python ../run.py --rust pq-info -o loc-clusters.json loc-clusters.parquet
  167. deps:
  168. - path: loc-clusters.parquet
  169. md5: 0748a8001c63142ebda3d099b6ebd84c
  170. size: 35754914
  171. outs:
  172. - path: loc-clusters.json
  173. md5: a9217be376ffffe877ee4f3a2f0dcbb9
  174. size: 352
  175. schema@name-fields:
  176. cmd: python ../run.py --rust pq-info -o name-fields.json name-fields.parquet
  177. deps:
  178. - path: name-fields.parquet
  179. md5: 9643ffeda173dba838da8975c88bd958
  180. size: 1398366309
  181. outs:
  182. - path: name-fields.json
  183. md5: 670f2e18ea5c27e7d6c9e60d36241d08
  184. size: 693
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...