Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

dvc.yaml 5.4 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  1. # This file uses the two-stage status design, so new command stages
  2. # need to be accompanied by an entry in the status stage at the end
  3. stages:
  4. az-ratings:
  5. cmd: python run.py --rust pcat -t az.raw_ratings -s az-ratings -T import/az-ratings.transcript
  6. -D az-schema -f CSV data/ratings_Books.csv
  7. wdir: ..
  8. deps:
  9. - data/ratings_Books.csv
  10. - schemas/az-schema.status
  11. outs:
  12. - import/az-ratings.transcript
  13. bx-ratings:
  14. cmd: python run.py bx-import -T import/bx-ratings.transcript data/BX-Book-Ratings.csv
  15. wdir: ..
  16. deps:
  17. - data/BX-Book-Ratings.csv
  18. - schemas/bx-schema.status
  19. outs:
  20. - import/bx-ratings.transcript
  21. gr-authors:
  22. cmd: python run.py --rust import-json -T import/gr-authors.transcript --stage
  23. gr-authors -D gr-schema --truncate import/gr-authors.toml data/goodreads_book_authors.json.gz
  24. wdir: ..
  25. deps:
  26. - data/goodreads_book_authors.json.gz
  27. - import/gr-authors.toml
  28. - schemas/gr-schema.status
  29. outs:
  30. - import/gr-authors.transcript
  31. gr-book-genres:
  32. cmd: python run.py --rust import-json -T import/gr-book-genres.transcript --stage
  33. gr-book-genres -D gr-schema --truncate import/gr-book-genres.toml data/goodreads_book_genres_initial.json.gz
  34. wdir: ..
  35. deps:
  36. - data/goodreads_book_genres_initial.json.gz
  37. - import/gr-book-genres.toml
  38. - schemas/gr-schema.status
  39. outs:
  40. - import/gr-book-genres.transcript
  41. gr-book-series:
  42. cmd: python run.py --rust import-json -T import/gr-book-series.transcript --stage
  43. gr-book-series -D gr-schema --truncate import/gr-book-series.toml data/goodreads_book_series.json.gz
  44. wdir: ..
  45. deps:
  46. - data/goodreads_book_genres_initial.json.gz
  47. - import/gr-book-series.toml
  48. - schemas/gr-schema.status
  49. outs:
  50. - import/gr-book-series.transcript
  51. gr-books:
  52. cmd: python run.py --rust import-json -T import/gr-books.transcript --stage gr-books
  53. -D gr-schema --truncate import/gr-books.toml data/goodreads_books.json.gz
  54. wdir: ..
  55. deps:
  56. - data/goodreads_books.json.gz
  57. - import/gr-books.toml
  58. - schemas/gr-schema.status
  59. outs:
  60. - import/gr-books.transcript
  61. gr-interactions:
  62. cmd: python run.py --rust import-json -T import/gr-interactions.transcript --stage
  63. gr-interactions -D gr-schema --truncate import/gr-interactions.toml data/goodreads_interactions.json.gz
  64. wdir: ..
  65. deps:
  66. - data/goodreads_interactions.json.gz
  67. - import/gr-interactions.toml
  68. - schemas/gr-schema.status
  69. outs:
  70. - import/gr-interactions.transcript
  71. gr-works:
  72. cmd: python run.py --rust import-json -T import/gr-works.transcript --stage gr-works
  73. -D gr-schema --truncate import/gr-works.toml data/goodreads_book_works.json.gz
  74. wdir: ..
  75. deps:
  76. - data/goodreads_book_works.json.gz
  77. - import/gr-works.toml
  78. - schemas/gr-schema.status
  79. outs:
  80. - import/gr-works.transcript
  81. loc-mds-books:
  82. cmd: python run.py --rust parse-marc --db-schema locmds -t book_marc_field --truncate
  83. --stage loc-mds-books -D loc-mds-schema --transcript import/loc-mds-books.transcript
  84. --src-dir data/loc-books --src-prefix BooksAll.2016
  85. wdir: ..
  86. deps:
  87. - data/loc-books
  88. - schemas/loc-mds-schema.status
  89. outs:
  90. - import/loc-mds-books.transcript
  91. loc-mds-names:
  92. cmd: python run.py --rust parse-marc --db-schema locmds -t name_marc_field --truncate
  93. --stage loc-mds-names -D loc-mds-schema --transcript import/loc-mds-names.transcript
  94. --src-dir data/loc-names --src-prefix Names.2016
  95. wdir: ..
  96. deps:
  97. - data/loc-names
  98. - schemas/loc-mds-schema.status
  99. outs:
  100. - import/loc-mds-names.transcript
  101. ol-authors:
  102. cmd: python run.py --rust import-json -T import/ol-authors.transcript --stage
  103. ol-authors -D ol-schema --truncate import/ol-authors.toml data/ol_dump_authors.txt.gz
  104. wdir: ..
  105. deps:
  106. - data/ol_dump_authors.txt.gz
  107. - import/ol-authors.toml
  108. - schemas/ol-schema.status
  109. outs:
  110. - import/ol-authors.transcript
  111. ol-editions:
  112. cmd: python run.py --rust import-json -T import/ol-editions.transcript --stage
  113. ol-editions -D ol-schema --truncate import/ol-editions.toml data/ol_dump_editions.txt.gz
  114. wdir: ..
  115. deps:
  116. - data/ol_dump_editions.txt.gz
  117. - import/ol-editions.toml
  118. - schemas/ol-schema.status
  119. outs:
  120. - import/ol-editions.transcript
  121. ol-works:
  122. cmd: python run.py --rust import-json -T import/ol-works.transcript --stage ol-works
  123. -D ol-schema --truncate import/ol-works.toml data/ol_dump_works.txt.gz
  124. wdir: ..
  125. deps:
  126. - data/ol_dump_works.txt.gz
  127. - import/ol-works.toml
  128. - schemas/ol-schema.status
  129. outs:
  130. - import/ol-works.transcript
  131. viaf:
  132. cmd: python run.py --rust parse-marc --db-schema viaf -t marc_field --truncate
  133. --stage viaf -D viaf-schema --transcript import/viaf.transcript --line-mode
  134. data/viaf-clusters-marc21.xml.gz
  135. wdir: ..
  136. deps:
  137. - data/viaf-clusters-marc21.xml.gz
  138. - schemas/viaf-schema.status
  139. outs:
  140. - import/viaf.transcript
  141. status:
  142. foreach:
  143. - az-ratings
  144. - bx-ratings
  145. - gr-authors
  146. - gr-book-genres
  147. - gr-books
  148. - gr-book-series
  149. - gr-interactions
  150. - gr-works
  151. - loc-mds-books
  152. - loc-mds-names
  153. - ol-authors
  154. - ol-editions
  155. - ol-works
  156. - viaf
  157. do:
  158. cmd: python ../run.py stage-status -o ${item}.status ${item}
  159. always_changed: true
  160. outs:
  161. - ${item}.status
  162. deps:
  163. - ${item}.transcript
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...