Beam_search and generate are not consistent

I find that beam_search() returns the probabilities score of the generated token. Based on the documentation, beam_search = generate(sample=false, num_beams>1). In the following small code, beam_search and generate are not consistent.

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import LogitsProcessorList, MinLengthLogitsProcessor, BeamSearchScorer,MaxLengthCriteria, StoppingCriteriaList


tokenizer = AutoTokenizer.from_pretrained("t5-small")

model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

model.resize_token_embeddings(len(tokenizer))
model.to("cuda")

seq1 = "summarize: beamsearch and generate does not give the same result"


encoding = tokenizer(
    [seq1],
    padding="longest",
    max_length=128,
    truncation=True,
    return_tensors="pt",
)

encoder_input_ids, attention_mask = encoding.input_ids.to("cuda"), encoding.attention_mask.to("cuda")
num_beams = 2
input_ids = torch.ones((num_beams, 1), device=model.device, dtype=torch.long)
input_ids = input_ids * model.config.decoder_start_token_id
model_kwargs = {
    "encoder_outputs": model.get_encoder()(
        encoder_input_ids.repeat_interleave(num_beams, dim=0), return_dict=True
    )
}
beam_scorer = BeamSearchScorer(
    batch_size=1,
    do_early_stopping=True,
    num_beams=num_beams,
    device=model.device,
)

outputs = model.beam_search(input_ids, beam_scorer,
                            logits_processor=None,
                            early_stopping=True,
                            no_repeat_ngram_size=4,
                            max_length=64,
                            **model_kwargs,
                            output_scores=True,
                            return_dict_in_generate=True)

# beam_search result":
out = tokenizer.batch_decode(outputs.sequences, skip_special_tokens=True)
print(" ".join(out))

>> beamsearch() and generate() does not give the same result. beamsearch does not give the same result

#generate results:
out = model.generate(encoder_input_ids,
                                           max_length=64,
                                     
                                           early_stopping=True,
                                           num_beams=2,
                                           do_sample=False,
                                           num_return_sequences=1)

tokenizer.batch_decode(out, skip_special_tokens=True)

>> ['beamsearch and generate does not give the same result. beamsearch does not provide the same result as beamsearch.']

# Remark1: Generate() and beam_search() does not give the same result.

# Remark2: If I understand correctly, outputs.sequences can be calculated from outputs.scores:

idx = []
for x in outputs.scores:
    i = x[0].exp().argmax().item() # here I take the first beam as I think beams are sorted.
    idx.append(i)
idx = torch.tensor([idx]).to("cuda")
print(idx) # outputs.sequences()!

What I missed here! My end goal is to get the log-prob of outputs.sequences tokens.