Exceeded our hourly quotas for action while loading dataset to HF Hub

I prepared a dataset to train the model. I want to save the prepared dataset to the Hub (using push_to_hub). During loading I get the following error:

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:261, in hf_raise_for_status(response, endpoint_name)
    260 try:
--> 261     response.raise_for_status()
    262 except HTTPError as e:

File ~/.local/lib/python3.10/site-packages/requests/models.py:1021, in Response.raise_for_status(self)
   1020 if http_error_msg:
-> 1021     raise HTTPError(http_error_msg, response=self)

HTTPError: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/artyomboyko/TEST/commit/main

The above exception was the direct cause of the following exception:

HfHubHTTPError                            Traceback (most recent call last)
Cell In[13], line 1
----> 1 current_dataset.push_to_hub("artyomboyko/TEST")

File ~/.local/lib/python3.10/site-packages/datasets/dataset_dict.py:1641, in DatasetDict.push_to_hub(self, repo_id, config_name, private, token, branch, max_shard_size, num_shards, embed_external_files)
   1639 logger.info(f"Pushing split {split} to the Hub.")
   1640 # The split=key needs to be removed before merging
-> 1641 repo_id, split, uploaded_size, dataset_nbytes, _, _ = self[split]._push_parquet_shards_to_hub(
   1642     repo_id,
   1643     data_dir=data_dir,
   1644     split=split,
   1645     private=private,
   1646     token=token,
   1647     branch=branch,
   1648     max_shard_size=max_shard_size,
   1649     num_shards=num_shards.get(split),
   1650     embed_external_files=embed_external_files,
   1651 )
   1652 total_uploaded_size += uploaded_size
   1653 total_dataset_nbytes += dataset_nbytes

File ~/.local/lib/python3.10/site-packages/datasets/arrow_dataset.py:5308, in Dataset._push_parquet_shards_to_hub(self, repo_id, data_dir, split, private, token, branch, max_shard_size, num_shards, embed_external_files)
   5306         shard.to_parquet(buffer)
   5307         uploaded_size += buffer.tell()
-> 5308         _retry(
   5309             api.upload_file,
   5310             func_kwargs={
   5311                 "path_or_fileobj": buffer.getvalue(),
   5312                 "path_in_repo": shard_path_in_repo,
   5313                 "repo_id": repo_id,
   5314                 "token": token,
   5315                 "repo_type": "dataset",
   5316                 "revision": branch,
   5317             },
   5318             exceptions=HTTPError,
   5319             status_codes=[504],
   5320             base_wait_time=2.0,
   5321             max_retries=5,
   5322             max_wait_time=20.0,
   5323         )
   5324     shards_path_in_repo.append(shard_path_in_repo)
   5326 # Cleanup to remove unused files

File ~/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py:293, in _retry(func, func_args, func_kwargs, exceptions, status_codes, max_retries, base_wait_time, max_wait_time)
    291 except exceptions as err:
    292     if retry >= max_retries or (status_codes and err.response.status_code not in status_codes):
--> 293         raise err
    294     else:
    295         sleep_time = min(max_wait_time, base_wait_time * 2**retry)  # Exponential backoff

File ~/.local/lib/python3.10/site-packages/datasets/utils/file_utils.py:290, in _retry(func, func_args, func_kwargs, exceptions, status_codes, max_retries, base_wait_time, max_wait_time)
    288 while True:
    289     try:
--> 290         return func(*func_args, **func_kwargs)
    291     except exceptions as err:
    292         if retry >= max_retries or (status_codes and err.response.status_code not in status_codes):

File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File ~/.local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:849, in future_compatible.<locals>._inner(self, *args, **kwargs)
    846     return self.run_as_future(fn, self, *args, **kwargs)
    848 # Otherwise, call the function normally
--> 849 return fn(self, *args, **kwargs)

File ~/.local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:3460, in HfApi.upload_file(self, path_or_fileobj, path_in_repo, repo_id, token, repo_type, revision, commit_message, commit_description, create_pr, parent_commit, run_as_future)
   3452 commit_message = (
   3453     commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
   3454 )
   3455 operation = CommitOperationAdd(
   3456     path_or_fileobj=path_or_fileobj,
   3457     path_in_repo=path_in_repo,
   3458 )
-> 3460 commit_info = self.create_commit(
   3461     repo_id=repo_id,
   3462     repo_type=repo_type,
   3463     operations=[operation],
   3464     commit_message=commit_message,
   3465     commit_description=commit_description,
   3466     token=token,
   3467     revision=revision,
   3468     create_pr=create_pr,
   3469     parent_commit=parent_commit,
   3470 )
   3472 if commit_info.pr_url is not None:
   3473     revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")

File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
    115 if check_use_auth_token:
    116     kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)

File ~/.local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:849, in future_compatible.<locals>._inner(self, *args, **kwargs)
    846     return self.run_as_future(fn, self, *args, **kwargs)
    848 # Otherwise, call the function normally
--> 849 return fn(self, *args, **kwargs)

File ~/.local/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2967, in HfApi.create_commit(self, repo_id, operations, commit_message, commit_description, token, repo_type, revision, create_pr, num_threads, parent_commit, run_as_future)
   2965 try:
   2966     commit_resp = get_session().post(url=commit_url, headers=headers, data=data, params=params)
-> 2967     hf_raise_for_status(commit_resp, endpoint_name="commit")
   2968 except RepositoryNotFoundError as e:
   2969     e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)

File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:303, in hf_raise_for_status(response, endpoint_name)
    299     raise BadRequestError(message, response=response) from e
    301 # Convert `HTTPError` into a `HfHubHTTPError` to display request information
    302 # as well (request id and/or server error message)
--> 303 raise HfHubHTTPError(str(e), response=response) from e

HfHubHTTPError: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/artyomboyko/TEST/commit/main (Request ID: Root=1-654239d0-188938f36e5602350bdb9f22;140ca28b-89bb-4fbc-8be3-b16f73376252)

You have exceeded our hourly quotas for action: commit. We invite you to retry later.

How do I fix it?

To avoid this error, we’ve merged a PR that reduces the number of commits created by push_to_hub (1 commit per 50 uploaded files instead of 1 commit for each file). You can use the improved version by installing datasets from main with pip install git+https://github.com/huggingface/datasets.

Hello @mariosasko. After updating datasets and huggingface_hub by using:

pip install git+https://github.com/huggingface/datasets
pip install git+https://github.com/huggingface/huggingface_hub

I have problem with tokenizers:

Installing collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.18.0
    Uninstalling huggingface-hub-0.18.0:
      Successfully uninstalled huggingface-hub-0.18.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tokenizers 0.14.1 requires huggingface_hub<0.18,>=0.16.4, but you have huggingface-hub 0.19.0.dev0 which is incompatible

And I can`t update tokenizers by using:

artyom@MSK-PC-01:~$ pip install git+https://github.com/huggingface/tokenizers
Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/huggingface/tokenizers
  Cloning https://github.com/huggingface/tokenizers to /tmp/pip-req-build-gzyayvyx
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/tokenizers /tmp/pip-req-build-gzyayvyx
  Resolved https://github.com/huggingface/tokenizers to commit c718c53bb95884752badfbb27920396bb2df1518
ERROR: git+https://github.com/huggingface/tokenizers does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.
artyom@MSK-PC-01:~$

Could you please tell me how to solve this package dependency problem?

I’ve opened a PR in the tokenizers repo to address this: Allow hf_hub 0.18 by mariosasko · Pull Request #1383 · huggingface/tokenizers · GitHub

@mariosasko Greate Thanks! Waiting result… I have already tried to process the non-volume Russian domain of the dataset CommoVoise 13. As far as I can tell, everything worked fine without solving this problem.

The tokenizers PR has been merged, so installing them from main should fix the issue with the huggingface_hub version.

@mariosasko

Hello. I install tokenizers using this instruction

I do somthing wrong?

Successfully built huggingface-hub
Installing collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.18.0
    Uninstalling huggingface-hub-0.18.0:
      Successfully uninstalled huggingface-hub-0.18.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tokenizers 0.14.2.dev0 requires huggingface_hub<0.19,>=0.16.4, but you have huggingface-hub 0.19.0.dev0 which is incompatible.
Successfully installed huggingface-hub-0.19.0.dev0
artyom@MSK-PC-01:~/tokenizers/bindings/python$

You need to build tokenizers from source to avoid this error. tokenizers are implemented in Rust (with bindings in Python), so pip install git+https://github.com/huggingface/tokenizers is not supported.

I undestand. That’s what I did according to the instructions. That is, I did not install using the “pip install tokenizer” instruction.