Parquet load_dataset fails in AzureML VM. Works in Windows Desktop

Hi,
I am trying to load a parquet file in AzureML local disk.
my_dataset = load_dataset(“parquet”,
data_files=r’./mydata/part-00005-tid-14548240332695166-f00e40bb-6e2a-473c-84c6-b7cc11a63e2d-75-1-c000.snappy.parquet’,
)
It throws an error as you can see. I am able to load .csv files in the same linux vm. When I use my desktop, it works for parquet too.
Question - why is it looking for loading script with the name parquet.py? Is it the generic loading script parquet.py? If yes, then why is it not available?

FileNotFoundError Traceback (most recent call last)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/load.py:320, in prepare_module(path, script_version, download_config, download_mode, dataset, force_local_path, dynamic_modules_path, return_resolved_file_path, **download_kwargs)
319 try:
→ 320 local_path = cached_path(file_path, download_config=download_config)
321 except FileNotFoundError:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/utils/file_utils.py:281, in cached_path(url_or_filename, download_config, **download_kwargs)
279 if is_remote_url(url_or_filename):
280 # URL, so get it from the cache (downloading if necessary)
→ 281 output_path = get_from_cache(
282 url_or_filename,
283 cache_dir=cache_dir,
284 force_download=download_config.force_download,
285 proxies=download_config.proxies,
286 resume_download=download_config.resume_download,
287 user_agent=download_config.user_agent,
288 local_files_only=download_config.local_files_only,
289 use_etag=download_config.use_etag,
290 max_retries=download_config.max_retries,
291 use_auth_token=download_config.use_auth_token,
292 )
293 elif os.path.exists(url_or_filename):
294 # File, and it exists.

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/utils/file_utils.py:621, in get_from_cache(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, use_auth_token)
620 elif response is not None and response.status_code == 404:
→ 621 raise FileNotFoundError(“Couldn’t find file at {}”.format(url))
622 _raise_if_offline_mode_is_enabled(f"Tried to reach {url}")

FileNotFoundError: Couldn’t find file at https://raw.githubusercontent.com/huggingface/datasets/1.8.0/datasets/parquet/parquet.py

During handling of the above exception, another exception occurred:

FileNotFoundError Traceback (most recent call last)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/load.py:332, in prepare_module(path, script_version, download_config, download_mode, dataset, force_local_path, dynamic_modules_path, return_resolved_file_path, **download_kwargs)
331 try:
→ 332 local_path = cached_path(file_path, download_config=download_config)
333 logger.warning(
334 “Couldn’t find file locally at {}, or remotely at {}.\n”
335 “The file was picked from the master branch on github instead at {}.”.format(
336 combined_path, github_file_path, file_path
337 )
338 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/utils/file_utils.py:281, in cached_path(url_or_filename, download_config, **download_kwargs)
279 if is_remote_url(url_or_filename):
280 # URL, so get it from the cache (downloading if necessary)
→ 281 output_path = get_from_cache(
282 url_or_filename,
283 cache_dir=cache_dir,
284 force_download=download_config.force_download,
285 proxies=download_config.proxies,
286 resume_download=download_config.resume_download,
287 user_agent=download_config.user_agent,
288 local_files_only=download_config.local_files_only,
289 use_etag=download_config.use_etag,
290 max_retries=download_config.max_retries,
291 use_auth_token=download_config.use_auth_token,
292 )
293 elif os.path.exists(url_or_filename):
294 # File, and it exists.

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/utils/file_utils.py:621, in get_from_cache(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, use_auth_token)
620 elif response is not None and response.status_code == 404:
→ 621 raise FileNotFoundError(“Couldn’t find file at {}”.format(url))
622 _raise_if_offline_mode_is_enabled(f"Tried to reach {url}")

FileNotFoundError: Couldn’t find file at https://raw.githubusercontent.com/huggingface/datasets/master/datasets/parquet/parquet.py

During handling of the above exception, another exception occurred:

FileNotFoundError Traceback (most recent call last)
Input In [16], in <cell line: 1>()
----> 1 hsa_dataset = load_dataset(“parquet”,
2 data_files=r’./mydata/part-00005-tid-14548240332695166-f00e40bb-6e2a-473c-84c6-b7cc11a63e2d-75-1-c000.snappy.parquet’,
3 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/load.py:708, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, ignore_verifications, keep_in_memory, save_infos, script_version, use_auth_token, task, **config_kwargs)
706 ignore_verifications = ignore_verifications or save_infos
707 # Download/copy dataset processing script
→ 708 module_path, hash, resolved_file_path = prepare_module(
709 path,
710 script_version=script_version,
711 download_config=download_config,
712 download_mode=download_mode,
713 dataset=True,
714 return_resolved_file_path=True,
715 use_auth_token=use_auth_token,
716 )
717 # Set the base path for downloads as the parent of the script location
718 if resolved_file_path is not None:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/datasets/load.py:340, in prepare_module(path, script_version, download_config, download_mode, dataset, force_local_path, dynamic_modules_path, return_resolved_file_path, **download_kwargs)
333 logger.warning(
334 “Couldn’t find file locally at {}, or remotely at {}.\n”
335 “The file was picked from the master branch on github instead at {}.”.format(
336 combined_path, github_file_path, file_path
337 )
338 )
339 except FileNotFoundError:
→ 340 raise FileNotFoundError(
341 “Couldn’t find file locally at {}, or remotely at {}.\n”
342 “The file is also not present on the master branch on github.”.format(
343 combined_path, github_file_path
344 )
345 )
346 elif path.count(“/”) == 1: # users datasets/metrics: s3 path (hub for datasets and s3 for metrics)
347 if dataset:

FileNotFoundError: Couldn’t find file locally at parquet/parquet.py, or remotely at https://raw.githubusercontent.com/huggingface/datasets/1.8.0/datasets/parquet/parquet.py.
The file is also not present on the master branch on github.

It worked by updating datasets package.