VoicePresets-TexttoSpeech- Pro/Inference - Suno/Bark

Hi,
I recently got the pro subscription to use the text to speech (Suno/Bark) and im not able to set the voice presets in using huggingface.js

The docs only have these parameter, model and inputs:
await hf.textToSpeech({
model: ‘espnet/kan-bayashi_ljspeech_vits’,
inputs: ‘Hello world!’
})

but if I add presets for voice (eg v2/en_speaker_9) found in the Bark docs, it doesn’t work.

All of bark’s docs are in python as well, and the hf.js doesn’t seem to account for them.

Any help?

my ttscontroller.js:

// File: /uncensored-backend/controllers/ttsController.js
import { HfInference } from “@huggingface/inference”;
import { HF_API_TOKEN } from “…/utils/constants.js”;

/**

  • Text-to-Speech (TTS) handler using the Hugging Face Inference API with the “suno/bark” model.

  • This handler enforces a female voice by prepending a directive to the input text and by setting a voice preset.

  • The preset used in this example is “v2/en_speaker_9”.

  • The API response may be returned as an ArrayBuffer, Uint8Array, or a Blob‑like object.

  • This code converts the response into audio data and sends it back with the appropriate Content-Type.
    */
    export const barkTTSHandler = async (req, res) => {
    try {
    // Validate the input text from the request body
    const { text } = req.body;
    if (!text || typeof text !== “string”) {
    return res.status(400).json({ error: “Missing ‘text’ field.” });
    }

    // Prepend directive to enforce a female voice
    const modifiedText = "Please speak in a female voice: " + text;
    console.log(
    Processing TTS request with text: "${modifiedText.substring(0, 50)}${ modifiedText.length > 50 ? "..." : "" }"
    );

    // Initialize the Hugging Face Inference client
    const hf = new HfInference(HF_API_TOKEN);
    console.log(“Sending request to Hugging Face API with voice preset v2/en_speaker_9”);

    try {
    // Call the textToSpeech API with the modified text and preset
    const response = await hf.textToSpeech({
    model: “suno/bark”,
    inputs: modifiedText,
    parameters: { voice_preset: “v2/en_speaker_9” },
    options: { wait_for_model: true },
    });

    console.log(“Received response from Hugging Face API”);

    // Check and handle different response types

    // Handle ArrayBuffer responses
    if (response instanceof ArrayBuffer) {
    console.log(Success: Received audio data as ArrayBuffer (${response.byteLength} bytes));
    res.setHeader(“Content-Type”, “audio/wav”);
    return res.send(Buffer.from(response));
    }
    // Handle Uint8Array responses
    else if (response instanceof Uint8Array) {
    console.log(Success: Received audio data as Uint8Array (${response.byteLength} bytes));
    res.setHeader(“Content-Type”, “audio/wav”);
    return res.send(Buffer.from(response));
    }
    // Handle Blob-like responses (which have an arrayBuffer method)
    else if (response && typeof response.arrayBuffer === “function”) {
    console.log(“Success: Received Blob-like audio response”);
    const arrayBuffer = await response.arrayBuffer();
    const contentType = response.type || “audio/wav”;
    res.setHeader(“Content-Type”, contentType);
    return res.send(Buffer.from(arrayBuffer));
    } else {
    console.error(“Unexpected response format:”, response);
    return res.status(500).json({
    error: “Bark TTS returned an unexpected response format.”,
    details: response,
    });
    }
    } catch (apiError) {
    // Log error details from the Hugging Face API call
    console.error(“Hugging Face API Error:”, apiError);
    const errorMsg = apiError.message || “Unknown error occurred”;
    const errorDetails = apiError.response?.data || {};
    console.error(“Error details:”, {
    message: errorMsg,
    response: errorDetails,
    status: apiError.response?.status,
    });

    // Detect subscription-related errors
    if (
    errorMsg.includes(“subscription”) ||
    errorMsg.includes(“pro”) ||
    errorMsg.includes(“upgrade”) ||
    errorMsg.includes(“quota”) ||
    errorMsg.includes(“limit”) ||
    (errorDetails &&
    typeof errorDetails === “object” &&
    (errorDetails.error || “”).toString().includes(“subscription”))
    ) {
    console.error(“Subscription error detected”);
    return res.status(402).json({
    error: “Hugging Face Pro subscription required for this model”,
    details: errorMsg,
    });
    }
    return res.status(500).json({
    error: “Error from Hugging Face API”,
    message: errorMsg,
    details: errorDetails,
    });
    }
    } catch (error) {
    // Handle any unexpected errors
    console.error(“Fatal error in barkTTSHandler:”, error);
    return res.status(500).json({
    error: “TTS processing error”,
    message: error.message || “Unknown error occurred”,
    });
    }
    };

export default barkTTSHandler;

1 Like

Hmmm…