Benchmark results

Command:

export d=mbart_benchmark_data
python examples/benchmarking/run_benchmark.py \
  --models facebook/bart-large-cnn \
  --log_filename $d/log.txt \
  --inference_memory_csv \
  $d/inference_memory.csv \
  --train_memory_csv $d/train_memory.csv \
  --train_time_csv $d/train_time.csv \
  --inference_time_csv $d/inference_time.csv \
  --fp16 --log_print --training  --save_to_csv \
  --batch_sizes 4 8 12 16

Results:
bart-large-cnn

==============        TRAIN - MEMORY - RESULTS        =======
--------------------------------------------------------------------------------
 Model Name                     Batch Size     Seq Length    Mem in MB
--------------------------------------------------------------------------------
   facebook/bart-large-cnn           4              8             2795
   facebook/bart-large-cnn           4              32           2897
   facebook/bart-large-cnn           4             128           3169
   facebook/bart-large-cnn           4             512           6873
   facebook/bart-large-cnn           8              8             2827
   facebook/bart-large-cnn           8              32            2933
   facebook/bart-large-cnn           8             128            3465
   facebook/bart-large-cnn           8             512           12195          
   facebook/bart-large-cnn           12             8             2859
   facebook/bart-large-cnn           12             32            3137
   facebook/bart-large-cnn           12            128            4371
   facebook/bart-large-cnn           12            512            N/A
   facebook/bart-large-cnn           16             8             2891
   facebook/bart-large-cnn           16             32            3105
   facebook/bart-large-cnn           16            128            5153
   facebook/bart-large-cnn           16            512            N/A
--------------------------------------------------------------------------------

mbart

=========       TRAIN - MEMORY - RESULTS        =======
--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length    Memory in MB
--------------------------------------------------------------------------------
  facebook/mbart-large-en-ro         4              8             4355
  facebook/mbart-large-en-ro         4              32            4947
  facebook/mbart-large-en-ro         4             128            5117
  facebook/mbart-large-en-ro         4             512           10383
  facebook/mbart-large-en-ro         8              8             4877
  facebook/mbart-large-en-ro         8              32            4493
  facebook/mbart-large-en-ro         8             128            5857
  facebook/mbart-large-en-ro         8             512            N/A
  facebook/mbart-large-en-ro         12             8             4909
  facebook/mbart-large-en-ro         12             32            5085
  facebook/mbart-large-en-ro         12            128            7079
  facebook/mbart-large-en-ro         12            512            N/A
  facebook/mbart-large-en-ro         16             8             4941
  facebook/mbart-large-en-ro         16             32            4663
  facebook/mbart-large-en-ro         16            128            8655
  facebook/mbart-large-en-ro         16            512            N/A
--------------------------------------------------------------------------------
  • This assumes that len(input_ids) == len(decoder_input_ids), which is not true for summarization
  • since bart-large-cnn has smaller embeddings, it is less likely to OOM (have an N/A entry)
  • env: v100 16GB GPU, fp16
    Train times show bart more than 2x faster
model,batch_size,sequence_length,result
facebook/mbart-large-en-ro,4,8,0.0669
facebook/mbart-large-en-ro,4,32,0.0699
facebook/mbart-large-en-ro,4,128,0.1377
facebook/mbart-large-en-ro,4,512,0.3529
facebook/mbart-large-en-ro,8,8,0.0672
facebook/mbart-large-en-ro,8,32,0.0831
facebook/mbart-large-en-ro,8,128,0.1928
facebook/mbart-large-en-ro,8,512,N/A
facebook/mbart-large-en-ro,12,8,0.0687
facebook/mbart-large-en-ro,12,32,0.1156
facebook/mbart-large-en-ro,12,128,0.2629
facebook/mbart-large-en-ro,12,512,N/A
facebook/mbart-large-en-ro,16,8,0.0705
facebook/mbart-large-en-ro,16,32,0.1392
facebook/mbart-large-en-ro,16,128,0.3334
facebook/mbart-large-en-ro,16,512,N/A
model,batch_size,sequence_length,result
facebook/bart-large-cnn,4,8,0.0619
facebook/bart-large-cnn,4,32,0.0629
facebook/bart-large-cnn,4,128,0.0623
facebook/bart-large-cnn,4,512,0.1274
facebook/bart-large-cnn,8,8,0.0699
facebook/bart-large-cnn,8,32,0.0628
facebook/bart-large-cnn,8,128,0.0705
facebook/bart-large-cnn,8,512,0.2347
facebook/bart-large-cnn,12,8,0.0614
facebook/bart-large-cnn,12,32,0.0620
facebook/bart-large-cnn,12,128,0.0884
facebook/bart-large-cnn,12,512,N/A
facebook/bart-large-cnn,16,8,0.0667
facebook/bart-large-cnn,16,32,0.0668
facebook/bart-large-cnn,16,128,0.1075
facebook/bart-large-cnn,16,512,N/A
1 Like