I have this currently, having done a git clone
on bart-large-cnn to the current working directory:
import 'dotenv/config'
import fs from 'fs/promises'
import {
env,
pipeline,
AutoTokenizer,
AutoModelForSeq2SeqLM,
} from '@xenova/transformers'
env.allowRemoteModels = false
env.localModelPath = '.'
async function summarizeDefinitions(definitions) {
// Load the tokenizer
const tokenizer = await AutoTokenizer.from_pretrained(
'bart-large-cnn',
)
// Load the model
const model = await AutoModelForSeq2SeqLM.from_pretrained(
'bart-large-cnn',
)
const summarizer = await pipeline('summarization', model, tokenizer)
const cleanedDefinitions = {}
let i = 0
for (const term in definitions) {
const defs = definitions[term]
const combinedDefs = defs.join('; ')
// Summarize the combined definitions
const summary = await summarizer(combinedDefs, {
max_length: 100, // adjust length based on your requirements
min_length: 1,
do_sample: false,
})
// Clean up the summary to create 1-3 word definitions
const cleaned = summary[0].summary_text
.split('.')
.map(s => s.trim())
.filter(s => s.length > 0)
.map(s =>
s
.split(',')
.map(ss => ss.trim())
.filter(ss => ss.length <= 3),
)
cleanedDefinitions[term] = {
definitions: cleaned.flat(),
// type: 'noun', // or determine part-of-speech based on your logic
}
if (i === 100) {
break
}
i++
}
return cleanedDefinitions
}
async function main() {
const definitions = JSON.parse(
await fs.readFile(
`import/language/tibetan/definitions.out.json`,
`utf-8`,
),
)
const cleanedDefinitions = await summarizeDefinitions(definitions)
console.log(cleanedDefinitions)
}
main()
I am getting this error:
./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:459
throw Error(`\`local_files_only=true\` or \`env.allowRemoteModels=false\` and file was not found locally at "${localPath}".`);
^
Error: `local_files_only=true` or `env.allowRemoteModels=false` and file was not found locally at "./import/language/tibetan/bart-large-cnn/tokenizer_config.json".
at getModelFile (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:459:27)
at getModelJSON (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:572:18)
at async Promise.all (index 1)
at loadTokenizer (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:61:18)
at Function.from_pretrained (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:4459:50)
at summarizeDefinitions (./import/language/tibetan/transform.ts:15:21)
at main (./import/language/tibetan/transform.ts:75:30)
Node.js v20.10.0
If I remove these lines at the top:
env.allowRemoteModels = false
env.localModelPath = '.'
I get this error, unable to find the model remote I guess:
Error: Could not locate file: "https://huggingface.co/facebook/bart-large-cnn/resolve/main/tokenizer_config.json".
at handleError (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:238:11)
at getModelFile (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:471:24)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
at getModelJSON (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/utils/hub.js:572:18)
at async Promise.all (index 0)
at loadTokenizer (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:61:18)
at Function.from_pretrained (./node_modules/.pnpm/@xenova+transformers@2.17.2/node_modules/@xenova/transformers/src/tokenizers.js:4459:50)
at summarizeDefinitions (./import/language/tibetan/transform.ts:12:21)
at main (./import/language/tibetan/transform.ts:72:30)
Node.js v20.10.0
I am going to try facebook/bart-large · Hugging Face instead, since it appears bart-large-cnn
(linked at the beginning) is missing tokenizer_config.json
.
Oh and I’m using "@xenova/transformers": "^2.17.2"
(the latest version as of this writing).
How do I get this working locally, in Node.js/JavaScript, using this facebook/bart-large-cnn
transformers.js model? Thanks for the help!