The code itself runs perfectly fine on my laptop as well as after creating a docker image.
try:
from bs4 import BeautifulSoup
import requests
from urllib.request import urlopen
import base64
import re
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig
import json
except Exception as e:
print("Error imports : {} ".format(e))
def lambda_handler(event=None, context=None):
headers = {'User-Agent': 'Mozilla/5.0'}
url = 'https://news.google.com/news/rss'
client = urlopen(url)
xml_page = client.read()
client.close()
soup = BeautifulSoup(xml_page, 'xml')
contents = soup.find_all("item")
encoded_links = []
headlines = []
dates = []
for news in contents:
if "youtube.com" in str(news.source):
continue
encoded_links.append(news.link.text)
headlines.append(news.title.text)
dates.append(news.pubDate.text)
encoded_links = encoded_links[:15]
headlines = headlines[:15]
dates = dates[:15]
decoded_links = []
for link in encoded_links:
coded = link[44:-5]
while True:
try:
url = base64.b64decode(coded)
break
except:
coded += "a"
url = str(base64.b64decode(coded))
strip1 = re.search("(?P<url>https?://[^\s]+)", url).group("url")
strip2 = stripped = strip1.split('$', 1)[0]
strip3 = stripped = strip2.split('\\', 1)[0]
decoded_links.append(strip3)
summarized_texts = []
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn', cache_dir="/tmp/")
model=BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn', cache_dir="/tmp/")
for link in decoded_links:
try:
new_page = requests.get(link, headers=headers)
except:
continue
new_soup = BeautifulSoup(new_page.text, 'lxml')
text = ""
paragraphs = new_soup.find_all("p")
for p in paragraphs:
text += p.text
inputs = tokenizer.batch_encode_plus([text],return_tensors='pt', truncation=True, max_length=1024)
summary_ids = model.generate(inputs['input_ids'], early_stopping=True)
bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summarized_texts.append(bart_summary)
print(bart_summary)
returned = [{'headline': title, 'date': date, 'summary': summarized} for title, date, summarized in zip(headlines, dates, summarized_texts)]
json_summaries = json.dumps(returned)
return json_summaries
But when uploaded to AWS Lambda (using aws ecr), I run into an error.
{
"errorMessage": "Can't load the model for 'facebook/bart-large-cnn'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'facebook/bart-large-cnn' is the correct path to a directory containing a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack.",
"errorType": "OSError",
"requestId": "938ad716-f2c2-4d18-920b-f170aa685b4c",
"stackTrace": [
" File \"/var/task/app.py\", line 59, in lambda_handler
model=BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn',cache_dir=\"/tmp/\")",
" File \"/var/lang/lib/python3.9/site-packages/transformers/modeling_utils.py\", line 2023, in from_pretrained\n raise EnvironmentError("
]
}
I don’t know what this error means, as the code runs fine on my machine and docker, meaning it’s not an issue with the code.