when I execute this code on windows, i could access datasets. But when I execute this code on my local mac OS, I got the error which I append after this code. Could you tell me how to access this datasets from my local mac OS? I sed same python ver and datasets ver however they didn’t work well.
import os
import librosa
import numpy as np
import itertools
import soundfile as sf
from datasets import load_dataset, Audio
from huggingface_hub import login
login(“my-ID”)
Load the streaming audio dataset
google = load_dataset(“google/fleurs”, “cmn_hans_cn”, split=“train”, streaming=True, use_auth_token=True)
google = google.cast_column(“audio”, Audio(sampling_rate=16000))
Define a function to add noise to audio samples based on the signal
def add_noise_to_signal(audio, noise_factor=0.1):
max_amplitude = np.max(np.abs(audio))
noise = np.random.randn(len(audio))
scaled_noise = noise_factor * max_amplitude * noise
augmented_audio = audio + scaled_noise
return augmented_audio
Create a new folder to save noisy audio samples
output_folder = r"my-folder"
os.makedirs(output_folder, exist_ok=True)
Iterate through the streaming dataset, add noise to each audio sample, and save
def download_and_save_noisy_audio(dataset, save_folder):
for item in itertools.islice(dataset, 5000):
audio_data = item[“audio”][“array”]
audio_sampling_rate = item[“audio”][“sampling_rate”]
audio_path = item[“audio”][“path”]
audio_filename = os.path.basename(audio_path)
save_path = os.path.join(save_folder, audio_filename)
if not os.path.exists(save_path):
noisy_audio = add_noise_to_signal(audio_data) # Add noise to the audio based on the signal
sf.write(save_path, noisy_audio, audio_sampling_rate, format="wav")
print("Download, noise addition, and save complete")
download_and_save_noisy_audio(google, output_folder)
when I execute this code on windows, i could access datasets. But when I execute this code on my local mac OS, I got the below error.
-----On my local mac OS error----
FileNotFoundError Traceback (most recent call last)
Cell In[6], line 40
35 sf.write(save_path, noisy_audio, audio_sampling_rate, format=“wav”)
38 print(“Download, noise addition, and save complete”)
—> 40 download_and_save_noisy_audio(google, output_folder)
Cell In[6], line 26, in download_and_save_noisy_audio(dataset, save_folder)
25 def download_and_save_noisy_audio(dataset, save_folder):
—> 26 for item in itertools.islice(dataset, 5000):
27 audio_data = item[“audio”][“array”]
28 audio_sampling_rate = item[“audio”][“sampling_rate”]
File ~/Voice_Ping/.venv/lib/python3.11/site-packages/datasets/iterable_dataset.py:1353, in IterableDataset.iter(self)
1350 yield formatter.format_row(pa_table)
1351 return
→ 1353 for key, example in ex_iterable:
1354 if self.features:
1355 # IterableDataset
automatically fills missing columns with None.
1356 # This is done with _apply_feature_types_on_example
.
1357 example = _apply_feature_types_on_example(
1358 example, self.features, token_per_repo_id=self._token_per_repo_id
1359 )
File ~/Voice_Ping/.venv/lib/python3.11/site-packages/datasets/iterable_dataset.py:207, in ExamplesIterable.iter(self)
206 def iter(self):
→ 207 yield from self.generate_examples_fn(**self.kwargs)
File ~/.cache/huggingface/modules/datasets_modules/datasets/google–fleurs/af82dbec419a815084fa63ebd5d5a9f24a6e9acdf9887b9e3b8c6bbd64e0b7ac/fleurs.py:213, in Fleurs._generate_examples(self, local_extracted_archives, archive_iters, text_paths)
210 langs = [self.config.name]
212 for archive, text_path, local_extracted_path, lang_id in zip(archive_iters, text_paths, local_extracted_archives, langs):
→ 213 with open(text_path, encoding=“utf-8”) as f:
214 lines = f.readlines()
215 data = self._get_data(lines, lang_id)
File ~/Voice_Ping/.venv/lib/python3.11/site-packages/datasets/streaming.py:74, in extend_module_for_streaming…wrap_auth…wrapper(*args, **kwargs)
72 @wraps(function)
73 def wrapper(*args, **kwargs):
—> 74 return function(*args, download_config=download_config, **kwargs)
File ~/Voice_Ping/.venv/lib/python3.11/site-packages/datasets/download/streaming_download_manager.py:507, in xopen(file, mode, download_config, *args, **kwargs)
505 except FileNotFoundError:
506 if file.startswith(config.HF_ENDPOINT):
→ 507 raise FileNotFoundError(
508 file + “\nIf the repo is private or gated, make sure to log in with huggingface-cli login
.”
509 ) from None
510 else:
511 raise
FileNotFoundError: https://huggingface.co/datasets/google/fleurs/resolve/main/data/cmn_hans_cn/train.tsv
If the repo is private or gated, make sure to log in with huggingface-cli login
.