Different Summary Outputs Locally vs API for the Same Text

Hi @kmfoda ,

You got my curiosity so I tried to reproduce on a small example.
The issue (at least in my simple test case) is linked to the model.config.prefix which is set to " ". By default the pipeline will prefix the prompt with the prefix (some models require custom prefixes prompts so they can be put in the config to avoid tedious prompting all the time).

@philschmid Is that prefix intended ?

import torch
import os
from transformers import pipeline

TOKEN = os.getenv("HF_API_TOKEN")

txt = "Nicolas and Phil are going to give their team access to the AI summarizing some calls next week."
summarizer = pipeline("summarization", model="kaizan/production-bart-large-cnn-samsum", use_auth_token=TOKEN, device=0)


def naive():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    tokenizer = summarizer.tokenizer
    model = summarizer.model

    inputs = tokenizer(txt, return_tensors="pt")
    summary_ids = model.generate(
        inputs["input_ids"].to(device),
        no_repeat_ngram_size=2,
        max_length=75,
        top_k=50,
        top_p=0.95,
        early_stopping=True,
    )
    print("=" * 10)
    summary_3 = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print(summary_3)
    print("=" * 10)


def naive_prefix():
    device = "cuda" if torch.cuda.is_available() else "cpu"

    tokenizer = summarizer.tokenizer
    model = summarizer.model

    inputs = tokenizer(" " + txt, return_tensors="pt")
    summary_ids = model.generate(
        inputs["input_ids"].to(device),
        no_repeat_ngram_size=2,
        max_length=75,
        top_k=50,
        top_p=0.95,
        early_stopping=True,
    )
    print("+" * 10)
    summary_3 = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print(summary_3)
    print("+" * 10)


def pipe():
    summary_3 = summarizer(
        txt,
        no_repeat_ngram_size=2,
        max_length=75,
        top_k=50,
        top_p=0.95,
        early_stopping=True,
    )
    print("-" * 10)
    print(summary_3[0]["summary_text"])
    print("-" * 10)


naive()
naive_prefix()
pipe()
2 Likes