How to load local git cloned model in transformers.js in Node.js?

I have this currently, having done a git clone on bart-large-cnn to the current working directory:

import 'dotenv/config'
import fs from 'fs/promises'
import {
  env,
  pipeline,
  AutoTokenizer,
  AutoModelForSeq2SeqLM,
} from '@xenova/transformers'

env.allowRemoteModels = false
env.localModelPath = '.'

async function summarizeDefinitions(definitions) {
  // Load the tokenizer
  const tokenizer = await AutoTokenizer.from_pretrained(
    'bart-large-cnn',
  )

  // Load the model
  const model = await AutoModelForSeq2SeqLM.from_pretrained(
    'bart-large-cnn',
  )

  const summarizer = await pipeline('summarization', model, tokenizer)

  const cleanedDefinitions = {}

  let i = 0
  for (const term in definitions) {
    const defs = definitions[term]
    const combinedDefs = defs.join('; ')

    // Summarize the combined definitions
    const summary = await summarizer(combinedDefs, {
      max_length: 100, // adjust length based on your requirements
      min_length: 1,
      do_sample: false,
    })

    // Clean up the summary to create 1-3 word definitions
    const cleaned = summary[0].summary_text
      .split('.')
      .map(s => s.trim())
      .filter(s => s.length > 0)
      .map(s =>
        s
          .split(',')
          .map(ss => ss.trim())
          .filter(ss => ss.length <= 3),
      )

    cleanedDefinitions[term] = {
      definitions: cleaned.flat(),
      // type: 'noun', // or determine part-of-speech based on your logic
    }

    if (i === 100) {
      break
    }

    i++
  }

  return cleanedDefinitions
}

async function main() {
  const definitions = JSON.parse(
    await fs.readFile(
      `import/language/tibetan/definitions.out.json`,
      `utf-8`,
    ),
  )

  const cleanedDefinitions = await summarizeDefinitions(definitions)
  console.log(cleanedDefinitions)
}

main()

I am getting this error:

./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:459
                    throw Error(`\`local_files_only=true\` or \`env.allowRemoteModels=false\` and file was not found locally at "${localPath}".`);
                        ^


Error: `local_files_only=true` or `env.allowRemoteModels=false` and file was not found locally at "./import/language/tibetan/bart-large-cnn/tokenizer_config.json".
    at getModelFile (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:459:27)
    at getModelJSON (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:572:18)
    at async Promise.all (index 1)
    at loadTokenizer (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:61:18)
    at Function.from_pretrained (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:4459:50)
    at summarizeDefinitions (./import/language/tibetan/transform.ts:15:21)
    at main (./import/language/tibetan/transform.ts:75:30)

Node.js v20.10.0

If I remove these lines at the top:

env.allowRemoteModels = false
env.localModelPath = '.'

I get this error, unable to find the model remote I guess:

Error: Could not locate file: "https://huggingface.co/facebook/bart-large-cnn/resolve/main/tokenizer_config.json".
    at handleError (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:238:11)
    at getModelFile (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:471:24)
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
    at getModelJSON (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:572:18)
    at async Promise.all (index 0)
    at loadTokenizer (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:61:18)
    at Function.from_pretrained (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:4459:50)
    at summarizeDefinitions (./import/language/tibetan/transform.ts:12:21)
    at main (./import/language/tibetan/transform.ts:72:30)

Node.js v20.10.0

I am going to try facebook/bart-large · Hugging Face instead, since it appears bart-large-cnn (linked at the beginning) is missing tokenizer_config.json.

Oh and I’m using "@xenova/transformers": "^2.17.2" (the latest version as of this writing).

How do I get this working locally, in Node.js/JavaScript, using this facebook/bart-large-cnn transformers.js model? Thanks for the help!

Manually adding a ./bart-large-cnn/tokenizer_config.json with this:

{
  "add_prefix_space": false,
  "bos_token": "<s>",
  "cls_token": "<s>",
  "eos_token": "</s>",
  "errors": "replace",
  "mask_token": "<mask>",
  "model_max_length": 1024,
  "name_or_path": "facebook/bart-large-cnn",
  "pad_token": "<pad>",
  "sep_token": "</s>",
  "special_tokens_map_file": null,
  "tokenizer_class": "BartTokenizer",
  "trim_offsets": true,
  "unk_token": "<unk>"
}

Results in this error, so probably this git repo has something wrong with it?

Error: local_files_only=true or env.allowRemoteModels=false and file was not found locally at "./bart-large-cnn/onnx/encoder_model_quantized.onnx".

Sidenote :slight_smile: python - How to use HuggingFace's Transformers.js to distill messy dictionary definitions down to a clean array of 1-3 word definitions? - Stack Overflow