I am a PRO user, and I want to run the following code, but it persistently throws the Payment required error. Could you please help me to fix the issue?
# Multi-Agent Document Processing System
# Using SmolAgent and SmolDocling for document extraction, analysis, visualization, and web searches
# Import necessary modules
from smolagents import CodeAgent, HfApiModel, tool, DuckDuckGoSearchTool
import json
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import PyPDF2
import io
import re
import os
import pandas as pd
import seaborn as sns
from collections import Counter
from typing import Dict, List, Any, Union, Tuple
# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
# Set up Hugging Face token for model access
from huggingface_hub import login
# Uncomment the line below to log in directly with your token
# login("your_huggingface_token")
# Or use the interactive login
login() # This will prompt you to enter your Hugging Face token
# ============================================================
# Document Extraction Tools (SmolDocling replacement)
# ============================================================
@tool
def extract_features_from_pdf(document_path: str) -> str:
"""
Extracts text and metadata features from a PDF document.
Returns a JSON string with extracted features.
Args:
document_path (str): Path to the PDF document
Returns:
str: JSON string with extracted text and metadata
"""
try:
# Open the PDF file
with open(document_path, 'rb') as file:
# Create a PDF reader object
pdf_reader = PyPDF2.PdfReader(file)
# Extract basic metadata
metadata = {
"title": os.path.basename(document_path),
"pages": len(pdf_reader.pages),
"is_encrypted": pdf_reader.is_encrypted
}
# Extract text from all pages
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() + "\n\n"
# Extract additional metadata if available
if pdf_reader.metadata:
for key in pdf_reader.metadata:
clean_key = key.strip('/').lower()
if pdf_reader.metadata[key] and str(pdf_reader.metadata[key]).strip():
metadata[clean_key] = str(pdf_reader.metadata[key])
# Calculate basic text statistics
words = word_tokenize(text)
word_count = len(words)
unique_words = len(set(words))
# Compile features
features = {
"text": text,
"metadata": metadata,
"statistics": {
"word_count": word_count,
"unique_words": unique_words,
"text_length": len(text)
}
}
return json.dumps(features)
except Exception as e:
return json.dumps({"error": str(e), "text": "", "metadata": {}})
@tool
def extract_features_from_text(document_path: str) -> str:
"""
Extracts features from a plain text document.
Returns a JSON string with extracted features.
Args:
document_path (str): Path to the text document
Returns:
str: JSON string with extracted text and basic analysis
"""
try:
# Open and read the text file
with open(document_path, 'r', encoding='utf-8') as file:
text = file.read()
# Extract basic metadata
metadata = {
"title": os.path.basename(document_path),
"file_size": os.path.getsize(document_path)
}
# Calculate basic text statistics
words = word_tokenize(text)
word_count = len(words)
unique_words = len(set(words))
# Compile features
features = {
"text": text,
"metadata": metadata,
"statistics": {
"word_count": word_count,
"unique_words": unique_words,
"text_length": len(text)
}
}
return json.dumps(features)
except Exception as e:
return json.dumps({"error": str(e), "text": "", "metadata": {}})
@tool
def extract_features_tool(document_path: str) -> str:
"""
Unified feature extraction tool that detects file type and calls the appropriate extractor.
Args:
document_path (str): Path to the document
Returns:
str: JSON string with extracted features
"""
# Determine file type based on extension
if document_path.lower().endswith('.pdf'):
return extract_features_from_pdf(document_path)
elif document_path.lower().endswith(('.txt', '.md', '.py', '.json')):
return extract_features_from_text(document_path)
else:
# Default to text extraction for unknown file types
try:
return extract_features_from_text(document_path)
except:
# If text extraction fails, return error
return json.dumps({
"error": f"Unsupported file type for {document_path}",
"text": "",
"metadata": {}
})
# ============================================================
# Analysis Tools
# ============================================================
@tool
def analyze_text_tool(features_json: str) -> str:
"""
Analyzes the extracted features and returns the result as a JSON string.
Performs text summarization, keyword extraction, and sentiment analysis.
Args:
features_json (str): JSON string with extracted document features
Returns:
str: JSON string with analysis results
"""
features = json.loads(features_json)
# Check if there was an error in feature extraction
if "error" in features and features["error"]:
return json.dumps({"error": features["error"]})
text = features.get("text", "")
if not text:
return json.dumps({"error": "No text found for analysis"})
# Perform basic text preprocessing
tokens = word_tokenize(text.lower())
stop_words = set(stopwords.words('english'))
punctuation = set(string.punctuation)
filtered_tokens = [word for word in tokens if word not in stop_words and word not in punctuation and len(word) > 2]
# Extract keywords based on frequency
word_freq = Counter(filtered_tokens)
keywords = [word for word, count in word_freq.most_common(10)]
# Create a basic summary (first 500 characters or first 3 sentences)
sentences = re.split(r'(?<=[.!?])\s+', text)
summary = " ".join(sentences[:min(3, len(sentences))])
if len(summary) > 500:
summary = summary[:497] + "..."
# Perform very basic sentiment analysis
positive_words = ['good', 'great', 'excellent', 'positive', 'amazing', 'wonderful', 'best', 'happy']
negative_words = ['bad', 'worst', 'terrible', 'negative', 'awful', 'poor', 'wrong', 'unhappy']
positive_count = sum(1 for word in filtered_tokens if word in positive_words)
negative_count = sum(1 for word in filtered_tokens if word in negative_words)
# Determine sentiment based on simple word count
if positive_count > negative_count:
sentiment = "positive"
elif negative_count > positive_count:
sentiment = "negative"
else:
sentiment = "neutral"
# Get the top 20 most frequent words for visualization
top_words = {word: count for word, count in word_freq.most_common(20)}
# Compile analysis results
analysis_result = {
"summary": summary,
"keywords": keywords,
"sentiment": sentiment,
"top_words": top_words,
"metadata": features.get("metadata", {}),
"statistics": features.get("statistics", {})
}
return json.dumps(analysis_result)
# ============================================================
# Visualization Tools
# ============================================================
@tool
def visualize_data_tool(analysis_json: str) -> None:
"""
Creates multiple visualizations from the analysis.
Generates a word cloud, bar chart, and sentiment visualization.
Args:
analysis_json (str): JSON string with analysis results
"""
analysis = json.loads(analysis_json)
# Check if there was an error in analysis
if "error" in analysis and analysis["error"]:
print(f"Error in visualization: {analysis['error']}")
return
# Create a figure with subplots
fig = plt.figure(figsize=(15, 12))
# 1. Word Cloud - Top Left
plt.subplot(2, 2, 1)
# Use summary and keywords for the word cloud
text_for_cloud = analysis["summary"] + " " + " ".join(analysis["keywords"] * 3)
wordcloud = WordCloud(
width=800,
height=400,
background_color="white",
max_words=100,
contour_width=3,
contour_color='steelblue'
).generate(text_for_cloud)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title("Word Cloud of Document Content")
# 2. Top Words Bar Chart - Top Right
plt.subplot(2, 2, 2)
top_words = analysis.get("top_words", {})
words = list(top_words.keys())[:10] # Limit to top 10 for readability
counts = [top_words[word] for word in words]
plt.barh(words, counts, color='skyblue')
plt.xlabel('Frequency')
plt.ylabel('Words')
plt.title('Top 10 Most Frequent Words')
plt.gca().invert_yaxis() # Display in descending order
# 3. Document Statistics - Bottom Left
plt.subplot(2, 2, 3)
statistics = analysis.get("statistics", {})
if statistics:
labels = list(statistics.keys())
values = list(statistics.values())
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99']
explode = [0.1] * len(values) # explode all slices slightly
plt.pie(values, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
plt.axis('equal')
plt.title('Document Statistics')
else:
plt.text(0.5, 0.5, "No statistics available", ha='center', va='center')
plt.axis('off')
# 4. Sentiment Analysis - Bottom Right
plt.subplot(2, 2, 4)
sentiment = analysis.get("sentiment", "neutral")
sentiment_values = {
"positive": 1,
"neutral": 0,
"negative": -1
}
sentiment_color = {
"positive": "green",
"neutral": "gray",
"negative": "red"
}
plt.bar(["Sentiment"], [sentiment_values[sentiment]], color=sentiment_color[sentiment])
plt.ylim(-1.2, 1.2)
plt.title(f'Document Sentiment: {sentiment.capitalize()}')
# Adjust layout and display
plt.tight_layout()
plt.show()
# Generate additional visualization: Word frequency trend
if "top_words" in analysis:
plt.figure(figsize=(12, 6))
top_words = analysis["top_words"]
words = list(top_words.keys())[:15] # Top 15 words
counts = [top_words[word] for word in words]
# Create DataFrame for Seaborn
df = pd.DataFrame({
'Word': words,
'Frequency': counts
})
# Create Seaborn bar plot
sns.barplot(x='Frequency', y='Word', data=df, palette='viridis')
plt.title('Word Frequency Distribution')
plt.tight_layout()
plt.show()
# ============================================================
# Web Search Tool
# ============================================================
# Using the built-in DuckDuckGoSearchTool from smolagents
web_search_tool = DuckDuckGoSearchTool()
@tool
def perform_web_search(analysis_json: str) -> str:
"""
Performs web searches based on keywords from the analysis.
Args:
analysis_json (str): JSON string with analysis results
Returns:
str: JSON string with search results
"""
analysis = json.loads(analysis_json)
# Check if there was an error in analysis
if "error" in analysis and analysis["error"]:
return json.dumps({"error": analysis["error"]})
# Get keywords for search
keywords = analysis.get("keywords", [])
if not keywords:
return json.dumps({"error": "No keywords available for search"})
# Take top 3 keywords for searching
top_keywords = keywords[:min(3, len(keywords))]
search_query = " ".join(top_keywords)
try:
# Perform web search
search_results = web_search_tool(search_query)
# Compile search results
results = {
"query": search_query,
"keywords_used": top_keywords,
"results": search_results[:5] # Limit to top 5 results
}
return json.dumps(results)
except Exception as e:
return json.dumps({"error": str(e)})
# ============================================================
# Set up the language model
# ============================================================
model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") # Requires HF token with access
# Alternative models if access to Llama is restricted:
# model = HfApiModel(model_id="mistralai/Mistral-7B-Instruct-v0.2")
# model = HfApiModel(model_id="google/gemma-7b")
# ============================================================
# Define specialized agents
# ============================================================
document_extraction_agent = CodeAgent(
tools=[extract_features_tool, extract_features_from_pdf, extract_features_from_text],
model=model,
name="document_extraction_agent",
description="Extracts features from documents using specialized extraction tools for different file types"
)
analysis_agent = CodeAgent(
tools=[analyze_text_tool],
model=model,
name="analysis_agent",
description="Analyzes extracted document features including summarization, keyword extraction and sentiment analysis"
)
visualization_agent = CodeAgent(
tools=[visualize_data_tool],
model=model,
name="visualization_agent",
description="Visualizes analysis results through multiple visualizations including word clouds and charts"
)
web_search_agent = CodeAgent(
tools=[perform_web_search, web_search_tool],
model=model,
name="web_search_agent",
description="Performs web searches based on keywords extracted from document analysis"
)
# ============================================================
# Define the manager agent
# ============================================================
manager_agent = CodeAgent(
tools=[],
model=model,
managed_agents=[document_extraction_agent, analysis_agent, visualization_agent, web_search_agent],
name="manager_agent",
description="Coordinates the document processing workflow between all specialized agents"
)
# ============================================================
# Main function to process a document
# ============================================================
def process_document(document_path: str):
"""
Processes a document through the multi-agent system.
Args:
document_path (str): Path to the document in Colab environment.
"""
print(f"Starting processing of document: {document_path}")
try:
# Run the manager agent with a detailed prompt
result = manager_agent.run(
f"Process the document at '{document_path}' following these steps in order: "
"1. Extract features from the document using document_extraction_agent - detect file type and use the appropriate extraction method. "
"2. Analyze the extracted features using analysis_agent to generate a summary, extract keywords, and perform sentiment analysis. "
"3. Create visualizations of the analysis using visualization_agent, including a word cloud and frequency charts. "
"4. Perform web searches using web_search_agent based on the top keywords from the analysis. "
"Return a comprehensive report of the findings from each step."
)
print("\n========== PROCESSING COMPLETED ==========")
print("Result:", result)
print("===========================================\n")
return result
except Exception as e:
print(f"Error during processing: {e}")
import traceback
traceback.print_exc()
return f"Processing failed: {str(e)}"
# ============================================================
# Example usage in Colab
# ============================================================
# Upload a document to Colab
from google.colab import files
print("Please upload a document (PDF, TXT, etc.)")
uploaded = files.upload() # Upload your document
if uploaded:
# Get the uploaded file's name
document_path = list(uploaded.keys())[0]
# Process the document
process_document(document_path)
else:
print("No document was uploaded. Please run the cell again and upload a document.")
ERROR:
Application of artificial neural networks to the prediction of tunnel boring machine penetration rate.pdf(application/pdf) - 2631707 bytes, last modified: 6/12/2021 - 100% done
Saving Application of artificial neural networks to the prediction of tunnel boring machine penetration rate.pdf to Application of artificial neural networks to the prediction of tunnel boring machine penetration rate.pdf
Starting processing of document: Application of artificial neural networks to the prediction of tunnel boring machine penetration rate.pdf
โญโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ New run - manager_agent โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฎ
โ โ
โ Process the document at 'Application of artificial neural networks to the prediction of tunnel boring machine โ
โ penetration rate.pdf' following these steps in order: 1. Extract features from the document using โ
โ document_extraction_agent - detect file type and use the appropriate extraction method. 2. Analyze the โ
โ extracted features using analysis_agent to generate a summary, extract keywords, and perform sentiment โ
โ analysis. 3. Create visualizations of the analysis using visualization_agent, including a word cloud and โ
โ frequency charts. 4. Perform web searches using web_search_agent based on the top keywords from the analysis. โ
โ Return a comprehensive report of the findings from each step. โ
โ โ
โฐโ HfApiModel - meta-llama/Llama-3.3-70B-Instruct โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ Step 1 โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
Error in generating model output:
402 Client Error: Payment Required for url:
https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1/chat/completions (Request
ID: Root=1-67e420cf-1ec0ac2a3a3102965c52fe0f;8fe0d876-5406-4953-9fd4-e7b03cd17bb5)
You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly
included credits.
[Step 1: Duration 0.59 seconds]
Error during processing: Error in generating model output:
402 Client Error: Payment Required for url: https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1/chat/completions (Request ID: Root=1-67e420cf-1ec0ac2a3a3102965c52fe0f;8fe0d876-5406-4953-9fd4-e7b03cd17bb5)
You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.
Traceback (most recent call last):
File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py", line 409, in hf_raise_for_status
response.raise_for_status()
File "/usr/local/lib/python3.11/dist-packages/requests/models.py", line 1024, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1/chat/completions
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 1186, in step
chat_message: ChatMessage = self.model(
^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/smolagents/models.py", line 994, in __call__
response = self.client.chat_completion(**completion_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/inference/_client.py", line 956, in chat_completion
data = self._inner_post(request_parameters, stream=stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/inference/_client.py", line 321, in _inner_post
hf_raise_for_status(response)
File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py", line 481, in hf_raise_for_status
raise _format(HfHubHTTPError, str(e), response) from e
huggingface_hub.errors.HfHubHTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1/chat/completions (Request ID: Root=1-67e420cf-1ec0ac2a3a3102965c52fe0f;8fe0d876-5406-4953-9fd4-e7b03cd17bb5)
You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<ipython-input-2-833afc1baaa9>", line 471, in process_document
result = manager_agent.run(
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 323, in run
return deque(self._run(task=self.task, max_steps=max_steps, images=images), maxlen=1)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 337, in _run
raise e
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 334, in _run
final_answer = self._execute_step(task, memory_step)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 358, in _execute_step
final_answer = self.step(memory_step)
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/dist-packages/smolagents/agents.py", line 1202, in step
raise AgentGenerationError(f"Error in generating model output:\n{e}", self.logger) from e
smolagents.utils.AgentGenerationError: Error in generating model output:
402 Client Error: Payment Required for url: https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.3-70B-Instruct/v1/chat/completions (Request ID: Root=1-67e420cf-1ec0ac2a3a3102965c52fe0f;8fe0d876-5406-4953-9fd4-e7b03cd17bb5)
You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.