@app.get("/sentiment-analysis")
def sentiment_analysis(url: str):
"""
Fetches webpage content using 'requests'.
Uses 'BeautifulSoup' to extract relevant textual content.
Applies 'NLTK' and 'SentimentIntensityAnalyzer' to determine sentiment scores.
Also uses a pretrained model from Hugging Face's Transformers for emotion analysis.
"""
try:
# Fetching webpage content
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
raise HTTPException(status_code=400, detail=f"Error fetching the URL: {e}")
# Parsing and extracting text from HTML
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text().strip()
sia = SentimentIntensityAnalyzer()
# NLTK Sentiment Analysis
sentiment_scores = sia.polarity_scores(text)
# Transformers Emotion Analysis
try:
# Tokenize and split text into chunks within the token limit
max_length = 512 # Model's token limit
tokens = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=max_length)
chunk_size = max_length - 2 # Account for special tokens [CLS] and [SEP]
chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
emotions_aggregated = {}
for chunk in chunks:
# Convert chunk back to string
chunk_text = tokenizer.decode(chunk, skip_special_tokens=True)
chunk_outputs = classifier(chunk_text)
for item in chunk_outputs:
label = item['label'] # Corrected access to the dictionary
score = item['score']
emotions_aggregated[label] = emotions_aggregated.get(label, 0) + score
# Average the scores
for emotion in emotions_aggregated:
emotions_aggregated[emotion] /= len(chunks)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in emotion analysis: {e}")
# Combining results
return {
"sentiment_scores": sentiment_scores,
"emotion_analysis": emotions_aggregated
}
“detail”: “Error in emotion analysis: list indices must be integers or slices, not str”
I am really new to the transformers architecture so much of this is probably going over my head, what am I missing here?