Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

megamolbart_pretrain_quick.sh 2.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  1. #!/bin/bash
  2. ####
  3. # Example shell script to run NeMo MegaMolbart data processing or training.
  4. ####
  5. ### CONFIG ###
  6. MEGAMOLBART_CONFIG_FILE=megamolbart_pretrain_xsmall_span_aug
  7. DATA_FORMAT='csv' # "csv" or "bin"
  8. DATA_MOUNT=/data/zinc_csv_split
  9. CODE_MOUNT=/workspace/nemo_chem
  10. OUTPUT_MOUNT=/result
  11. PROJECT=MegaMolBART
  12. RESULTS_MOUNT=${OUTPUT_MOUNT}/nemo_experiments/${DATA_FORMAT}/${MEGAMOLBART_CONFIG_FILE}
  13. DATA_FILES_SELECTED=x_OP_000..001_CL_ #x000
  14. WANDB_OFFLINE=True
  15. TRAINING_ARGS="exp_manager.exp_dir=${RESULTS_MOUNT}"
  16. TRAINING_ARGS="${TRAINING_ARGS} exp_manager.wandb_logger_kwargs.offline=${WANDB_OFFLINE}"
  17. TRAINING_ARGS="${TRAINING_ARGS} model.data.dataset_path=${DATA_MOUNT}"
  18. TRAINING_ARGS="${TRAINING_ARGS} model.data.dataset_format=${DATA_FORMAT}"
  19. ### END CONFIG ###
  20. usage() {
  21. cat <<EOF
  22. USAGE: megamolbart_pretrain_quick.sh
  23. megamolbart pretrain script
  24. ----------------------------------------
  25. megamolbart_pretrain_quick.sh [command]
  26. valid commands:
  27. preprocess
  28. train
  29. default command:
  30. train
  31. options:
  32. -f|--data-files
  33. List of data files to use
  34. --data-format
  35. Training data format. Valid values: "csv" or "bin".
  36. -c|--config
  37. Configuration
  38. EOF
  39. }
  40. execute() {
  41. set -x
  42. python megamolbart_pretrain.py \
  43. --config-path=conf \
  44. --config-name=${MEGAMOLBART_CONFIG_FILE} \
  45. do_training=${DO_TRAINING}
  46. ${TRAINING_ARGS}
  47. set +x
  48. }
  49. preprocess() {
  50. DO_TRAINING="False"
  51. parse_args $@
  52. execute
  53. }
  54. train() {
  55. DO_TRAINING="True"
  56. parse_args $@
  57. execute
  58. }
  59. parse_args() {
  60. while [[ $# -gt 0 ]]; do
  61. case $1 in
  62. -f|--data-files)
  63. DATA_FILES_SELECTED="$2"
  64. shift
  65. shift
  66. ;;
  67. --data-format)
  68. DATA_FORMAT="$2"
  69. shift
  70. shift
  71. ;;
  72. -c|--config)
  73. MEGAMOLBART_CONFIG_FILE="$2"
  74. shift
  75. shift
  76. ;;
  77. -a|--args)
  78. TRAINING_ARGS="${TRAINING_ARGS} $2"
  79. shift
  80. shift
  81. ;;
  82. *)
  83. usage
  84. exit 1
  85. ;;
  86. esac
  87. done
  88. }
  89. mkdir -p ${RESULTS_MOUNT}/${EXP_NAME}
  90. export PYTHONPATH=${CODE_MOUNT}:$PYTHONPATH
  91. export HYDRA_FULL_ERROR=1
  92. cd ${CODE_MOUNT}/examples/chem
  93. if [ $# -eq 0 ]; then
  94. ARGS=train
  95. CMD='train'
  96. else
  97. ARGS=$1
  98. CMD=$@
  99. fi
  100. case $ARGS in
  101. preprocess)
  102. $CMD
  103. ;;
  104. train)
  105. $CMD
  106. ;;
  107. *)
  108. usage
  109. exit 1
  110. ;;
  111. esac
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...