Run distributed eval
- faster than run_eval even on 1 GPU (because of sortish sampler)
- also multi-gpu
- I use it through the
run_distributed_eval
alias. (if i were to redo, might namedeval
). - These assume you are in examples/seq2seq (but works on model-hub models)
run_distributed_eval () {
proc=$1
m=$2
dd=$3
sd=$4
shift
shift
shift
shift
python -m torch.distributed.launch --nproc_per_node=$proc run_distributed_eval.py \
--model_name $m --save_dir $sd --data_dir $dd $@
}
cc @valhalla, @patrickvonplaten, @stas
Usage::
run_distributed_eval 2 Helsinki-NLP/opus-mt-en-ro wmt_en_ro marian_baseline --
fp16 --bs 64 --task translation
saves marian_baseline/test_generations.txt
and marian_baseline/test_bleu.json
.
The same command for 1 GPU:
run_distributed_eval 2 Helsinki-NLP/opus-mt-en-ro wmt_en_ro marian_baseline --
fp16 --bs 64 --task translation
Other useful Aliases
# for evaluating best_tfmr after pl training
eval_best () {
proc=$1
m=$2
dd=$3
shift
shift
shift
run_distributed_eval $proc $m/best_tfmr $dd $m/ $@
}
# Make and evaluate checkpoint averages:
make_ensemble () {
python convert_pl_checkpoint_to_hf.py $1 $1/best_tfmr $1/avg_tfmr
}
eval_avg () {
proc=$1
m=$2
dd=$3
shift
shift
shift
run_distributed_eval $proc $m/avg_tfmr $dd $m/avg_tfmr_metrics $@
}
Sync logs to aws
assumes $sdbart
is an s3 bucket where you dumb stuff
sync_sdbart () {
save_dir=$1
shift
aw3 sync $save_dir $sdbart/$save_dir --exclude "*.ckpt" $@
}
dsync_sdbart () {
save_dir=$1
shift
aw3 sync $sdbart/$save_dir $save_dir $@
}
sync_metrics () {
save_dir=$1
shift
aw3 sync $save_dir $sdbart/$save_dir --exclude="*" --include="*generations*" --include="*.json"
}
dsync_metrics () {
save_dir=$1
shift
aw3 sync $sdbart/$save_dir $save_dir --exclude="*" --include="*generations*" --include="*.json"
}
json_dsync_sdbart () {
save_dir=$1
shift
aw3 sync $sdbart/$save_dir $save_dir --exclude "*" --include "metrics.json" --include "test_*.json" $@
}
sync_rouge () {
aw3 sync . $sdbart/ --exclude="*" --include="*/test_rouge.json" $@
}
sync_bleu () {
aw3 sync . $sdbart/ --exclude="*" --include="*/test_bleu.json" $@
}
dsync_rouge (){
aw3 sync $sdbart/ . --exclude="*" --include="*/test_rouge.json" $@
}
dsync_bleu () {
aw3 sync $sdbart/ . --exclude="*" --include="*/test_bleu.json" $@
}