Custom TensorboardCallback - how to correctly tb_writer.flush()?

I’m trying to add some custom logs to my training pipeline by overriding some methods of TensorboardCallback.

Here’s my custom TensorboardCallback class:

class CustomTensorBoardCallback(TensorBoardCallback):
"""Custom TensorBoardCallback class to log hardware usage
and store some predictions at evaluation phases"""  

def __init__(self, tokenizer, num_samples=5, freq=1, tb_writer=None):
	super().__init__(tb_writer)
	
	self.tokenizer = tokenizer
	
	self.num_samples = num_samples
	self.freq = freq # Frequency of logging
	
	self.sample_sources = None
	self.sample_references = None
	self.sample_sources_ids = None  

@staticmethod
def _bytes_to_gigabytes(bytes):
	return round((bytes / 1024) / 1024 / 1024, 2)

def _log_hardware_usage(self, state):
	self.tb_writer.add_scalar("cpu_ram_available_GB", self._bytes_to_gigabytes(psutil.virtual_memory().available))
	self.tb_writer.add_scalar("cpu_ram_used_GB", self._bytes_to_gigabytes(psutil.virtual_memory().used))  

	if torch.cuda.is_available():
		self.tb_writer.add_scalar("gpu_usage", torch.cuda.utilization())
		self.tb_writer.add_scalar("gpu_memory_usage_GB", self._bytes_to_gigabytes(torch.cuda.memory_usage()))

	self.tb_writer.flush()

def on_init_end(self, args, state, control, **kwargs):
	print("I am running on_init_end")
	super().on_init_end(args, state, control, **kwargs)
	self._log_hardware_usage(state)

  

def on_log(self, args, state, control, **kwargs):
	print("I am running on_log")
	super().on_log(args, state, control, **kwargs)
	self._log_hardware_usage(state)

def on_evaluate(self, args, state, control, model, eval_dataloader, **kwargs):
	print("I am running on_evaluate")
	super().on_evaluate(args, state, control, **kwargs)
	
	if state.global_step % self.freq == 0:
		if self.sample_sources_ids is None:
			# Get the first batch of validation samples
			val_first_batch = next(iter(eval_dataloader))
			for key in val_first_batch:
				val_first_batch[key] = val_first_batch[key][:self.num_samples, :]
			self.sample_sources_ids = val_first_batch["input_ids"]
			# Decode source segments and labels
			self.sample_sources = tokenize_utils.decode_token_ids(self.sample_sources_ids.cpu(), self.tokenizer)
			self.sample_references = tokenize_utils.decode_token_ids(val_first_batch["labels"].cpu(), self.tokenizer)

		preds_encoded = model.generate(self.sample_sources_ids)
		preds_decoded = tokenize_utils.decode_token_ids(preds_encoded.cpu(), self.tokenizer)  

		self.tb_writer.add_text(
			"eval_source_ref_pred_sample",
			"\n".join([
				f"{src_seg}\t{ref}\t{pred}"
				for src_seg, ref, pred
				in zip(self.sample_sources, self.sample_references, preds_decoded)
			])
		)
		self.tb_writer.flush()

I’m calling it in a script with :

# Define training hyperparameters
train_args = Seq2SeqTrainingArguments(
	output_dir = output_folder,
	**config["train_args"],
	log_level="info",
	logging_first_step=True,
	run_name=config["run_name"],
	disable_tqdm=True,
	load_best_model_at_end=True,
	metric_for_best_model="eval_loss",
	report_to="tensorboard",
	push_to_hub=False,
	predict_with_generate=True
)

tb_writer = SummaryWriter(log_dir="mylogdir")
tb_callback = misc_utils.CustomTensorBoardCallback(tokenizer=T5_tokenizer, tb_writer=tb_writer)
...
callbacks = [tb_callback]
...
trainer = Seq2SeqTrainer(
	T5_model,
	train_args,
	...
	callbacks=callbacks
)

I can confirm that my custom methods are being run because I had to fix some errors (e.g. I had forgotten to move the tensors to cpu), and I could find the prints in my stdout file.

However, when I open tensorboard (tensorboard --logdir .), I can find the default Huggingface logged values (train/eval loss, some custom metrics I implemented, runtime, total_flos…), but not the custom ones I implemented and explicitely flushed in CustomTensorBoardCallback.

Do you have any idea what I could be doing wrong / why the flushed logs don’t appear in the tensorboard ?