It would be nice to list available models based on the tasks in a Pythonic way, e.g.
import re
import requests
from bs4 import BeautifulSoup
def list_models(task, sort_by="downloads"):
"""Returns first page of results from available models on huggingface.co"""
url = f"https://huggingface.co/models?pipeline_tag={task}&sort={sort_by}"
response = requests.get(url)
soup = BeautifulSoup(response.content.decode('utf8'))
for model in soup.find_all('article'):
parsed_text = [line.strip() for line in re.sub(' +', ' ', model.text.replace('\n', ' ').replace('\t', ' ').replace('•', '\n')).strip().split('\n')]
model_name_str, last_updated_str, downloaded, *liked = parsed_text
liked = int(liked[0]) if liked else 0
model_name = model.find('a').attrs['href'][1:]
timestamp = model.find('time').attrs['datetime']
yield {"model_name": model_name, "last_updated": timestamp, "downloaded": downloaded.strip(), "liked": liked}
task = "automatic-speech-recognition"
sort_by = "downloads"
list(list_models(task, sort_by))
Ideally, a user would want to do something like:
import transformers
transformers.list_models("automatic-speech-recognition")
It can get real fancy like having tasks in a fixed list of enums and then create complimentary functions like:
transformers.list_models(task="...")
transformers.list_metrics(task="...")
transformers.list_datasets(task="...",user=None)
I would imagine someone would go kind of crazy and have some fun with:
from transfomrers import list_datasets, list_models, list_metrics
from transformers import pipeline
for d in list_datasets(task="machine-translation"):
test_data = load_datasets(d['name'], split="test")
for m in list_models(task="machine-translation"):
mt = pipeline("translation", model=m['name'])
outputs = mt(test)
for s in transformers.list_metrics(task="machine-translation"):
metric = evaluate.metric(s['name'])
scores = metric.compute(source=test_data['source'], target=outputs)
print(d['name'], m['name'], s['name'], scores)