1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
select_genes:
cmd: papermill -f stages/1_select_genes_and_species.yaml --progress-bar --report-mode
notebooks/1_select_genes_and_species.ipynb data/output/notebooks/1_select_genes_and_species_report.ipynb
deps:
- path: data/input/genes/by_animal_class
md5: 5bb1b3db720c77cb0800fa40f0196f29.dir
- path: data/input/samples.tsv
md5: 17e2bb8020c8ac1f8c3c7445fac901c3
- path: data/input/species.tsv
md5: cdf6379f1d78780afc6f650b10636494
- path: notebooks/1_select_genes_and_species.ipynb
md5: 201de29fbe33b8bf5b546fcf16ec15f6
- path: stages/1_select_genes_and_species.yaml
md5: 22f95cba7413c7c641fec0720b8338ee
outs:
- path: data/interim/selected_expressions.tsv
md5: d872ec9f5ff9ac16d7dd6d877cc077b1
- path: data/interim/selected_genes.tsv
md5: dc2b3b921d9fb4ed358a085c53426307
- path: data/interim/selected_samples.tsv
md5: f3a11a4ad6550a7f356bf51e7a4e183b
- path: data/interim/selected_species.tsv
md5: 74a64f77b443ed7b6e9b44240953ec7a
- path: data/output/counts/species_sample_count.tsv
md5: eced40b842c0ec786a43d0e8c1da073d
- path: data/output/counts/species_tissue_sample_count.tsv
md5: 7e78391b84b265bdc0db65ef0f293056
- path: data/output/counts/tissue_sample_count.tsv
md5: 95d7d50886117aae30a8ee8c335f5076
- path: data/output/notebooks/1_select_genes_and_species_report.ipynb
md5: c170a67d0a586c49d1442695c78bdcbc
select_samples:
cmd: papermill -f parameters/select_samples.yaml --progress-bar --report-mode notebooks/select_samples.ipynb
data/output/notebooks/select_samples.ipynb
deps:
- path: data/input/genes/by_animal_class
md5: 5bb1b3db720c77cb0800fa40f0196f29.dir
- path: data/input/samples.tsv
md5: 17e2bb8020c8ac1f8c3c7445fac901c3
- path: data/input/species.tsv
md5: cdf6379f1d78780afc6f650b10636494
- path: data/input/total_mtDNA_base_composition.csv
md5: 3735cfc4beffb6c9356f908525345ee6
- path: notebooks/select_samples.ipynb
md5: 9db059d4bd6319903528a5f737f0f4f0
- path: parameters/select_samples.yaml
md5: ccd2ff9e431a861a3cabaf02eb5533ab
outs:
- path: data/interim/selected/gestation_days
md5: 03f0461ba9a1ca26f052203b948cd78b.dir
- path: data/interim/selected/lifespan
md5: 4c5315e23281c98c3b5d748b84d32b3e.dir
- path: data/interim/selected/mass_kg
md5: f36dbab9692c98800c464052e8dc85af.dir
- path: data/interim/selected/metabolic_rate
md5: 470ea1aeccc3bafa4b93268181360e6d.dir
- path: data/interim/selected/mtGC
md5: fa17686c777e1fad034b626b1efad921.dir
- path: data/interim/selected/temperature
md5: 4f4453d99a2c54fd9422e5e85c7fa5be.dir
- path: data/output/counts/species_sample_count.tsv
md5: 84c76b86cb61c5bf73c7f06c390dc064
- path: data/output/counts/species_tissue_sample_count.tsv
md5: e1b7a7f290203cad12f412d33e2a0bcb
- path: data/output/counts/tissue_sample_count.tsv
md5: 5a1441ea61f14d0242037f6a36c9d4a4
- path: data/output/notebooks/select_samples.ipynb
md5: 3e57c8aa755ebec3c8e2addf0f0c4e33
results_intersections:
cmd: papermill -f parameters/results_intersections.yaml --progress-bar --report-mode
notebooks/results_intersections.ipynb data/output/notebooks/results_intersections.ipynb
deps:
- path: data/output/external/causality
md5: c00e3428ab5aac19fdc6c5157b09fb33.dir
- path: data/output/external/linear
md5: 024046bb0390f0d3bd374580359ebff4.dir
- path: parameters/results_intersections.yaml
md5: 45a394673b593a63177a9853f01d24c4
outs:
- path: data/output/notebooks/results_intersections.ipynb
md5: 42127d40730f345e12effb4dff4d55e2
- path: data/output/results/shap_with_linear_genage.tsv
md5: 6d3ebc5c50cc026941db3fc269a1746d
- path: data/output/stage_2/shap_results.tsv
md5: 9b052d5683d3e61e6845086801c5b709
shap_selection:
cmd: papermill -f parameters/shap_selection.yaml --progress-bar --report-mode notebooks/shap_selection.ipynb
data/output/notebooks/shap_selection.ipynb
deps:
- path: data/interim/selected/gestation_days
md5: 03f0461ba9a1ca26f052203b948cd78b.dir
- path: data/interim/selected/lifespan
md5: 65f40bf267437f9a07131d706e0fed3d.dir
- path: data/interim/selected/mass_kg
md5: f36dbab9692c98800c464052e8dc85af.dir
- path: data/interim/selected/metabolic_rate
md5: 470ea1aeccc3bafa4b93268181360e6d.dir
- path: data/interim/selected/mtGC
md5: fa17686c777e1fad034b626b1efad921.dir
- path: data/interim/selected/temperature
md5: 4f4453d99a2c54fd9422e5e85c7fa5be.dir
- path: parameters/shap_selection.yaml
md5: 43734a0f6f4865a78d36e5ea7e773176
outs:
- path: data/interim/stage_1/results_concatenated.tsv
md5: aed0d7ed5a721fb780f90a2dccbd70e5
- path: data/interim/stage_2/input_data
md5: 9f1ca3ea434625790c4cab969e07aab7.dir
- path: data/output/notebooks/shap_selection.ipynb
md5: c48cb96cd95b4936a3adb043018a6d8c
- path: data/output/plots/stage_one_summary_lifespan.svg
md5: dbe5c9bbbbf06e6a62bf2f447b78010f
- path: data/output/plots/stage_two_untuned_summary_lifespan.svg
md5: f10b432adda8d0ef644b972515cfba24
tuning:
cmd: echo "work in progress!"
deps:
- path: data/interim/selected/expressions.tsv
md5: e8e8ea38f54eec4c9d51069d8cc2fde0
- path: data/interim/selected/genes.tsv
md5: 08f457c2517b3f601f5e58e3b1ec4ae0
- path: data/interim/selected/genes_meta.tsv
md5: 0387f21fcd4b287dc924432c8b90042d
- path: data/interim/selected/samples.tsv
md5: c370e925f56a0ed33b7dcb16553d4c39
- path: data/interim/selected/species.tsv
md5: 44e0c63a0d7749abcd43b3c1f92cc54b
outs:
- path: data/output/optimization/study.sqlite
md5: ef8116b0684832e6e11749edcd4e5cbf
tune:
cmd: python tune.py
deps:
- path: data/interim/selected/gestation_days
md5: 03f0461ba9a1ca26f052203b948cd78b.dir
- path: data/interim/selected/lifespan
md5: 4c5315e23281c98c3b5d748b84d32b3e.dir
- path: data/interim/selected/mass_kg
md5: f36dbab9692c98800c464052e8dc85af.dir
- path: data/interim/selected/metabolic_rate
md5: 470ea1aeccc3bafa4b93268181360e6d.dir
- path: data/interim/selected/mtGC
md5: fa17686c777e1fad034b626b1efad921.dir
- path: data/interim/selected/temperature
md5: 4f4453d99a2c54fd9422e5e85c7fa5be.dir
params:
parameters/tune.yaml:
debug_local: true
folds: 5
hold_outs: 1
metrics: r2_huber_kendall
not_validated_species: true
repeats: 10
threads: 1
trait: lifespan
trials: 10
stage_one:
cmd: papermill -f parameters/stage_one_shap_selection.yaml --progress-bar --report-mode
notebooks/stage_one_shap_selection.ipynb data/output/notebooks/stage_one_shap_selection.ipynb
deps:
- path: data/interim/optimization/gestation_days.sqlite
md5: 8f2a409f95031e839202e4feb88e99e0
- path: data/interim/optimization/lifespan.sqlite
md5: 0f73d0f933f828dcc5e287e6c9cacd5b
- path: data/interim/optimization/mass_kg.sqlite
md5: 23ddb28f55e3146b58e646e6e329e6b7
- path: data/interim/optimization/metabolic_rate.sqlite
md5: 247702368a8eab9c8824cac26a7f66e3
- path: data/interim/optimization/mtGC.sqlite
md5: 08ebe078dd5041f18945a571d3415054
- path: data/interim/optimization/temperature.sqlite
md5: 6cbdb4405cd2a969fefc39cd95565e45
- path: parameters/stage_one_shap_selection.yaml
md5: 43734a0f6f4865a78d36e5ea7e773176
outs:
- path: data/interim/stage_1/partitions
md5: c70cae310172de021ff95c4bebb96e21.dir
- path: data/interim/stage_1/results_concatenated.tsv
md5: 206bf0eb607029e3bd6ff9fdf22bf636
- path: data/interim/stage_2/input
md5: 0e23e5707e9e4cf0295e4b14e1d3cedd.dir
- path: data/output/plots/stage_1/
md5: bb4dbd82d36324094523b556c572345c.dir
- path: data/output/stage_1/gestation_days_selected.tsv
md5: 06a8f9d1da9583bd0d8926d094f7361a
- path: data/output/stage_1/lifespan_selected.tsv
md5: 31fcf98988b82e5027db92db47df6240
- path: data/output/stage_1/mass_kg_selected.tsv
md5: 35d066eb10b7c7f7d2cba20c2c72a25f
- path: data/output/stage_1/metabolic_rate_selected.tsv
md5: fb8c331c1713bcde1ef3850add59ae8d
- path: data/output/stage_1/mtGC_selected.tsv
md5: 589a0f09a8948829ba4dc465f81cd5ac
- path: data/output/stage_1/temperature_selected.tsv
md5: 645b89723a9fdaca13f21799fe842db7
stage_two:
cmd: papermill -f parameters/stage_two_shap_selection.yaml --progress-bar --report-mode
notebooks/stage_two_shap_selection.ipynb data/output/notebooks/stage_two_shap_selection.ipynb
deps:
- path: data/interim/optimization/lifespan_2.sqlite
md5: 4b479767827123b1a697440822d5b06e
- path: data/interim/stage_1/results_concatenated.tsv
md5: 206bf0eb607029e3bd6ff9fdf22bf636
- path: data/interim/stage_2/input
md5: 0e23e5707e9e4cf0295e4b14e1d3cedd.dir
outs:
- path: data/interim/stage_2/partitions
md5: ec4b53d890a4a6a62edcca84d9fc58f2.dir
- path: data/output/plots/interactions/
md5: ad45d9e3067cffeadb60cc9331ebafe6.dir
- path: data/output/plots/life_history_no_genes
md5: 3b34200c00fa455938bcee249e8a0477.dir
- path: data/output/plots/stage_2/decision_lifespan.svg
md5: 227947f4eeb6b1deb7c50f54b682ec2b
- path: data/output/plots/stage_2/heatmap_lifespan_unclustered.svg
md5: 1e824d421b577011c784769085da926d
- path: data/output/plots/stage_2/summary_lifespan.svg
md5: 805c2eeb15f5b62195ba71de5398dc97
- path: data/output/stage_2/lifespan.tsv
md5: fd09fc97efb766069a635e0d2cb2035c
- path: data/output/stage_2/lifespan_with_traits.tsv
md5: 6a72662224500b1c2f3ff8d01121fd2f
results:
cmd: papermill -f parameters/results_intersections.yaml --progress-bar --report-mode
notebooks/results_intersections.ipynb data/output/notebooks/results_intersections.ipynb
deps:
- path: data/output/external/causality
md5: 49db2b61f6e05fef5d975c56f139322f.dir
- path: data/output/external/causality/causal_selection.tsv
md5: 055328523cd2cb3df99fe180e423d8f8
- path: data/output/external/causality/signature_scores_ensembl_codes.csv
md5: 4aad6c97755d71c87fe139c72c120c13
- path: data/output/external/linear
md5: 024046bb0390f0d3bd374580359ebff4.dir
- path: notebooks/results_intersections.ipynb
md5: 63982d25a471caa6d5c1285fb679bfa0
- path: parameters/results_intersections.yaml
md5: 45a394673b593a63177a9853f01d24c4
outs:
- path: data/output/notebooks/results_intersections.ipynb
md5: 90e722d2cb5bd50a635dcd8cebb52d95
- path: data/output/results/genage_intersection.tsv
md5: b3cce68304b233b447a43ac771c9c647
- path: data/output/results/shap_with_causal_linear.tsv
md5: acf2db7ed9284c58dc30fc327dda0c6e
- path: data/output/results/shap_with_linear.tsv
md5: 28fc3e2bbebe7b7f9fc0bc7cd5e5dc97
- path: data/output/stage_2/shap_results.tsv
md5: 60a9cf8cd66d7011fe20b842801c34b4
stage_three:
cmd: papermill -f parameters/stage_three_shap_selection.yaml --progress-bar --report-mode
notebooks/stage_three_shap_selection.ipynb data/output/notebooks/stage_three_shap_selection.ipynb
deps:
- path: data/interim/optimization/lifespan_3.sqlite
md5: cc8844b2243a74b0368eebaecff4c9c4
outs:
- path: data/output/plots/stage_3/interactions/
md5: 5c546f6f29787821989c7e649feb151f.dir
- path: data/output/plots/stage_3/life_history/
md5: 8b7645e7bd0bbc945c5791078e3a15f9.dir
- path: data/output/plots/stage_3/stage_3_decision.svg
md5: e81f2ae90173942b4f659cd52905958e
- path: data/output/plots/stage_3/stage_3_heatmap_unclustered.svg
md5: d0f1e476c35c248fdf84d49fd013f062
- path: data/output/plots/stage_3/stage_3_interactions.png
md5: 8db6f94afc5df7482e9eb24966048893
- path: data/output/plots/stage_3/stage_3_summary.svg
md5: 97b42fcb9529808b1735c36c82ea51dd
Tip!
Press p or to see the previous file or,
n or to see the next file