I observed low accuracy in the BERT ensemble, where each individual BERT model—BanglaBERT, BUETBERT, and DistilBERT—achieved around 70–75% accuracy.
Below is the architecture of my model:
For each model applied this below line first
pip install accelerate -U
=======================
after that
Banglabert:
Imports and Installations
=======================
import os
os.environ[“WANDB_DISABLED”] = “true”
First uninstall transformers fully
#!pip uninstall -y transformers
Install correct version manually
!pip install transformers==4.41.1
!pip install --upgrade transformers datasets --quiet
!pip install datasets
!pip install demoji
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from collections import defaultdict
import torch
import demoji
import re
=======================
Mount Google Drive
=======================
from google.colab import drive
drive.mount(‘/content/Drive’)
Set working directory
os.chdir(‘/content/Drive/My Drive/MSC thesis 25.04.2025’)
=======================
Text Preprocessing Function
=======================
demoji.download_codes()
def preprocess_bangla_text(text):
text = demoji.replace(text, repl=“”)
whitespace = re.compile(r"[\s\u0020\u00a0\u1680\u180e\u202f\u205f\u3000\u2000-\u200a]+“, re.UNICODE)
bangla_fullstop = u”\u0964"
punctSeq = u"['"“”‘’]+|[.?!,…]+|[:;]+"
punc = u"[(),$%^&*+={}:"|'~`<>/,¦!?½£¶¼©⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞⅟↉¤¿º;-]+"
bangla_numbers = u"[০১২৩৪৫৬৭৮৯]+"
text = re.sub(punctSeq, " ", text)
text = re.sub(bangla_fullstop, " ", text)
text = re.sub(punc, " ", text)
text = re.sub(bangla_numbers, "", text)
text = whitespace.sub(" ", text).strip()
return text
=======================
Load Dataset
=======================
Load Tier 1 (strict - will be used for evaluation only)
tier1 = pd.read_excel(‘strict_cleaned_file.xlsx’)
tier1 = tier1[[‘Text’, ‘Region’]]
Load Tier 2 and Tier 3 (flexible and all-no for training)
tier2 = pd.read_excel(‘flexible_filtered.xlsx’)
tier2 = tier2[[‘Text’, ‘Region’]]
tier3 = pd.read_excel(‘all_no_cases.xlsx’)
tier3 = tier3[[‘Text’, ‘Region’]]
Combine Tier 2 + Tier 3 for training
train_df = pd.concat([tier2, tier3], ignore_index=True)
=======================
Preprocessing
=======================
tier1[‘Text’] = tier1[‘Text’].apply(preprocess_bangla_text)
train_df[‘Text’] = train_df[‘Text’].apply(preprocess_bangla_text)
=======================
Special Word-Based Oversampling
=======================
special_words_df = pd.read_excel(‘special_words.xlsx’)
special_vocab = defaultdict(set)
for _, row in special_words_df.iterrows():
special_vocab[row[‘region’]].add(str(row[‘word’]))
original_counts = train_df[‘Region’].value_counts().to_dict()
max_count = max(original_counts.values())
augmented_rows =
for region in train_df[‘Region’].unique():
regional_texts = train_df[train_df[‘Region’] == region]
current_count = len(regional_texts)
needed = max_count - current_count
if needed <= 0:
continue
texts_with_keywords = regional_texts[
regional_texts['Text'].apply(lambda txt: any(word in str(txt) for word in special_vocab[region]))
]
if len(texts_with_keywords) == 0:
continue
for _ in range(needed):
sample_row = texts_with_keywords.sample(n=1, replace=True).iloc[0]
augmented_rows.append(sample_row)
Append oversampled data
train_df = pd.concat([train_df, pd.DataFrame(augmented_rows)], ignore_index=True)
=======================
Encode Labels
=======================
unique_labels = tier1[‘Region’].unique().tolist()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
train_df[‘label’] = train_df[‘Region’].map(label2id)
tier1[‘label’] = tier1[‘Region’].map(label2id)
=======================
Tokenization
=======================
tokenizer = AutoTokenizer.from_pretrained(“sagorsarker/bangla-bert-base”)
Prepare datasets
dataset_train = Dataset.from_pandas(train_df[[‘Text’, ‘label’]])
dataset_test = Dataset.from_pandas(tier1[[‘Text’, ‘label’]])
def tokenize(batch):
return tokenizer(batch[‘Text’], padding=“max_length”, truncation=True, max_length=128)
dataset_train = dataset_train.map(tokenize, batched=True)
dataset_test = dataset_test.map(tokenize, batched=True)
=======================
Model Loading
=======================
model = AutoModelForSequenceClassification.from_pretrained(
“sagorsarker/bangla-bert-base”,
num_labels=len(unique_labels)
)
=======================
Training Arguments
=======================
training_args = TrainingArguments(
output_dir="./results_banglabert_tier2_tier3",
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
logging_dir="./logs_banglabert_tier2_tier3",
logging_steps=10,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
metric_for_best_model="accuracy"
)
=======================
Compute Metrics
=======================
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = logits.argmax(axis=-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=‘weighted’)
acc = accuracy_score(labels, predictions)
return {
‘accuracy’: acc,
‘f1’: f1,
‘precision’: precision,
‘recall’: recall
}
=======================
Trainer Setup
=======================
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset_train,
eval_dataset=dataset_test,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
=======================
Training and Evaluation
=======================
trainer.train()
trainer.evaluate()
Add label mappings before saving
model.config.label2id = label2id
model.config.id2label = id2label
Save model
trainer.save_model(“/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/banglabert_saved_model”)
for buetbert:
=======================
Imports and Installations
=======================
import os
os.environ[“WANDB_DISABLED”] = “true”
First uninstall transformers fully
#!pip uninstall -y transformers
Install correct version manually
!pip install transformers==4.41.1
!pip install --upgrade transformers datasets --quiet
!pip install datasets
!pip install demoji
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from collections import defaultdict
import torch
import demoji
import re
=======================
Mount Google Drive
=======================
from google.colab import drive
drive.mount(‘/content/Drive’)
Set working directory
os.chdir(‘/content/Drive/My Drive/MSC thesis 25.04.2025’)
=======================
Text Preprocessing Function
=======================
demoji.download_codes()
def preprocess_bangla_text(text):
text = demoji.replace(text, repl=“”)
whitespace = re.compile(r"[\s\u0020\u00a0\u1680\u180e\u202f\u205f\u3000\u2000-\u200a]+“, re.UNICODE)
bangla_fullstop = u”\u0964"
punctSeq = u"['"“”‘’]+|[.?!,…]+|[:;]+"
punc = u"[(),$%^&*+={}:"|'~`<>/,¦!?½£¶¼©⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞⅟↉¤¿º;-]+"
bangla_numbers = u"[০১২৩৪৫৬৭৮৯]+"
text = re.sub(punctSeq, " ", text)
text = re.sub(bangla_fullstop, " ", text)
text = re.sub(punc, " ", text)
text = re.sub(bangla_numbers, "", text)
text = whitespace.sub(" ", text).strip()
return text
=======================
Load Dataset
=======================
Load Tier 1 (strict - will be used for evaluation only)
tier1 = pd.read_excel(‘strict_cleaned_file.xlsx’)
tier1 = tier1[[‘Text’, ‘Region’]]
Load Tier 2 and Tier 3 (flexible and all-no for training)
tier2 = pd.read_excel(‘flexible_filtered.xlsx’)
tier2 = tier2[[‘Text’, ‘Region’]]
tier3 = pd.read_excel(‘all_no_cases.xlsx’)
tier3 = tier3[[‘Text’, ‘Region’]]
Combine Tier 2 + Tier 3 for training
train_df = pd.concat([tier2, tier3], ignore_index=True)
=======================
Preprocessing
=======================
tier1[‘Text’] = tier1[‘Text’].apply(preprocess_bangla_text)
train_df[‘Text’] = train_df[‘Text’].apply(preprocess_bangla_text)
=======================
Special Word-Based Oversampling
=======================
special_words_df = pd.read_excel(‘special_words.xlsx’)
special_vocab = defaultdict(set)
for _, row in special_words_df.iterrows():
special_vocab[row[‘region’]].add(str(row[‘word’]))
original_counts = train_df[‘Region’].value_counts().to_dict()
max_count = max(original_counts.values())
augmented_rows =
for region in train_df[‘Region’].unique():
regional_texts = train_df[train_df[‘Region’] == region]
current_count = len(regional_texts)
needed = max_count - current_count
if needed <= 0:
continue
texts_with_keywords = regional_texts[
regional_texts['Text'].apply(lambda txt: any(word in str(txt) for word in special_vocab[region]))
]
if len(texts_with_keywords) == 0:
continue
for _ in range(needed):
sample_row = texts_with_keywords.sample(n=1, replace=True).iloc[0]
augmented_rows.append(sample_row)
Append oversampled data
train_df = pd.concat([train_df, pd.DataFrame(augmented_rows)], ignore_index=True)
=======================
Encode Labels
=======================
unique_labels = tier1[‘Region’].unique().tolist()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
train_df[‘label’] = train_df[‘Region’].map(label2id)
tier1[‘label’] = tier1[‘Region’].map(label2id)
=======================
Tokenization
=======================
tokenizer = AutoTokenizer.from_pretrained(“csebuetnlp/banglabert”)
Prepare datasets
dataset_train = Dataset.from_pandas(train_df[[‘Text’, ‘label’]])
dataset_test = Dataset.from_pandas(tier1[[‘Text’, ‘label’]])
def tokenize(batch):
return tokenizer(batch[‘Text’], padding=“max_length”, truncation=True, max_length=128)
dataset_train = dataset_train.map(tokenize, batched=True)
dataset_test = dataset_test.map(tokenize, batched=True)
=======================
Model Loading
=======================
model = AutoModelForSequenceClassification.from_pretrained(
“csebuetnlp/banglabert”,
num_labels=len(unique_labels)
)
=======================
Training Arguments
=======================
training_args = TrainingArguments(
output_dir="./results_buetbert_tier2_tier3",
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
logging_dir="./logs_buetbert_tier2_tier3",
logging_steps=10,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
metric_for_best_model="accuracy"
)
=======================
Compute Metrics
=======================
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = logits.argmax(axis=-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=‘weighted’)
acc = accuracy_score(labels, predictions)
return {
‘accuracy’: acc,
‘f1’: f1,
‘precision’: precision,
‘recall’: recall
}
=======================
Trainer Setup
=======================
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset_train,
eval_dataset=dataset_test,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
=======================
Training and Evaluation
=======================
trainer.train()
trainer.evaluate()
Add label mappings before saving
model.config.label2id = label2id
model.config.id2label = id2label
Save model
trainer.save_model(“/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/buetbert_saved_model”)
for DistilBERT
=======================
Imports and Installations
=======================
import os
os.environ[“WANDB_DISABLED”] = “true”
First uninstall transformers fully
#!pip uninstall -y transformers
Install correct version manually
!pip install transformers==4.41.1
!pip install --upgrade transformers datasets --quiet
!pip install datasets
!pip install demoji
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from collections import defaultdict
import torch
import demoji
import re
=======================
Mount Google Drive
=======================
from google.colab import drive
drive.mount(‘/content/Drive’)
Set working directory
os.chdir(‘/content/Drive/My Drive/MSC thesis 25.04.2025’)
=======================
Text Preprocessing Function
=======================
demoji.download_codes()
def preprocess_bangla_text(text):
text = demoji.replace(text, repl=“”)
whitespace = re.compile(r"[\s\u0020\u00a0\u1680\u180e\u202f\u205f\u3000\u2000-\u200a]+“, re.UNICODE)
bangla_fullstop = u”\u0964"
punctSeq = u"['"“”‘’]+|[.?!,…]+|[:;]+"
punc = u"[(),$%^&*+={}:"|'~`<>/,¦!?½£¶¼©⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞⅟↉¤¿º;-]+"
bangla_numbers = u"[০১২৩৪৫৬৭৮৯]+"
text = re.sub(punctSeq, " ", text)
text = re.sub(bangla_fullstop, " ", text)
text = re.sub(punc, " ", text)
text = re.sub(bangla_numbers, "", text)
text = whitespace.sub(" ", text).strip()
return text
=======================
Load Dataset
=======================
Load Tier 1 (strict - will be used for evaluation only)
tier1 = pd.read_excel(‘strict_cleaned_file.xlsx’)
tier1 = tier1[[‘Text’, ‘Region’]]
Load Tier 2 and Tier 3 (flexible and all-no for training)
tier2 = pd.read_excel(‘flexible_filtered.xlsx’)
tier2 = tier2[[‘Text’, ‘Region’]]
tier3 = pd.read_excel(‘all_no_cases.xlsx’)
tier3 = tier3[[‘Text’, ‘Region’]]
Combine Tier 2 + Tier 3 for training
train_df = pd.concat([tier2, tier3], ignore_index=True)
=======================
Preprocessing
=======================
tier1[‘Text’] = tier1[‘Text’].apply(preprocess_bangla_text)
train_df[‘Text’] = train_df[‘Text’].apply(preprocess_bangla_text)
=======================
Special Word-Based Oversampling
=======================
special_words_df = pd.read_excel(‘special_words.xlsx’)
special_vocab = defaultdict(set)
for _, row in special_words_df.iterrows():
special_vocab[row[‘region’]].add(str(row[‘word’]))
original_counts = train_df[‘Region’].value_counts().to_dict()
max_count = max(original_counts.values())
augmented_rows =
for region in train_df[‘Region’].unique():
regional_texts = train_df[train_df[‘Region’] == region]
current_count = len(regional_texts)
needed = max_count - current_count
if needed <= 0:
continue
texts_with_keywords = regional_texts[
regional_texts['Text'].apply(lambda txt: any(word in str(txt) for word in special_vocab[region]))
]
if len(texts_with_keywords) == 0:
continue
for _ in range(needed):
sample_row = texts_with_keywords.sample(n=1, replace=True).iloc[0]
augmented_rows.append(sample_row)
Append oversampled data
train_df = pd.concat([train_df, pd.DataFrame(augmented_rows)], ignore_index=True)
=======================
Encode Labels
=======================
unique_labels = tier1[‘Region’].unique().tolist()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
train_df[‘label’] = train_df[‘Region’].map(label2id)
tier1[‘label’] = tier1[‘Region’].map(label2id)
=======================
Tokenization
=======================
tokenizer = AutoTokenizer.from_pretrained(“distilbert-base-multilingual-cased”)
Prepare datasets
dataset_train = Dataset.from_pandas(train_df[[‘Text’, ‘label’]])
dataset_test = Dataset.from_pandas(tier1[[‘Text’, ‘label’]])
def tokenize(batch):
return tokenizer(batch[‘Text’], padding=“max_length”, truncation=True, max_length=128)
dataset_train = dataset_train.map(tokenize, batched=True)
dataset_test = dataset_test.map(tokenize, batched=True)
=======================
Model Loading
=======================
model = AutoModelForSequenceClassification.from_pretrained(
“distilbert-base-multilingual-cased”,
num_labels=len(unique_labels)
)
=======================
Training Arguments
=======================
training_args = TrainingArguments(
output_dir="./results_distilbert_tier2_tier3",
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
logging_dir="./logs_distilbert_tier2_tier3",
logging_steps=10,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
metric_for_best_model="accuracy"
)
=======================
Compute Metrics
=======================
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = logits.argmax(axis=-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average=‘weighted’)
acc = accuracy_score(labels, predictions)
return {
‘accuracy’: acc,
‘f1’: f1,
‘precision’: precision,
‘recall’: recall
}
=======================
Trainer Setup
=======================
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset_train,
eval_dataset=dataset_test,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
=======================
Training and Evaluation
=======================
trainer.train()
trainer.evaluate()
Add label mappings before saving
model.config.label2id = label2id
model.config.id2label = id2label
Save model
trainer.save_model(“/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/distilbert_saved_model”)
However, after applying the ensemble, the accuracy dropped to 14%. I had saved the individual models to my drive before ensembling. Why did this happen?"
ensemble bert:
Uninstall broken numpy
!pip uninstall -y numpy
Reinstall clean and compatible numpy version
!pip install numpy==1.26.4
!pip install datasets==2.19.1
=== INSTALLATION ===
!pip install -q transformers==4.36.2 datasets openpyxl
=== IMPORTS ===
import pandas as pd
import numpy as np
import re
import torch
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import Dataset
from google.colab import drive
=== DRIVE MOUNT ===
drive.mount(‘/content/Drive’)
=== TEXT CLEANING ===
def preprocess_bangla_text(text):
whitespace = re.compile(r"[\s\u0020\u00a0\u1680\u180e\u202f\u205f\u3000\u2000-\u200a]+“, re.UNICODE)
bangla_fullstop = u”\u0964"
punctSeq = u"['"“”‘’]+|[.?!,…]+|[:;]+"
punc = u"[(),$%^&*+={}:"|'~`<>/!?;–—]"
bangla_numbers = u"[০-৯]+"
text = re.sub(punctSeq, " ", text)
text = re.sub(bangla_fullstop, " ", text)
text = re.sub(punc, " “, text)
text = re.sub(bangla_numbers, “”, text)
text = whitespace.sub(” ", text).strip()
return text
=== LOAD DATA ===
df = pd.read_excel(“/content/Drive/MyDrive/MSC thesis 25.04.2025/strict_cleaned_file.xlsx”)[[“Text”, “Region”]]
df[“Text”] = df[“Text”].astype(str).apply(preprocess_bangla_text)
unique_labels = sorted(df[“Region”].unique())
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}
df[“label”] = df[“Region”].map(label2id)
=== TOKENIZE DATASET ===
dataset = Dataset.from_pandas(df[[“Text”, “label”]])
=== MODEL INFO ===
models_info = {
“banglabert”: {
“path”: “/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/banglabert_saved_model”,
“tokenizer”: AutoTokenizer.from_pretrained(“sagorsarker/bangla-bert-base”)
},
“buetbert”: {
“path”: “/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/buetbert_saved_model”,
“tokenizer”: AutoTokenizer.from_pretrained(“csebuetnlp/banglabert”)
},
“distilbert”: {
“path”: “/content/Drive/MyDrive/MSC thesis 25.04.2025/three_bert_tier2/distilbert_saved_model”,
“tokenizer”: AutoTokenizer.from_pretrained(“distilbert-base-multilingual-cased”)
}
}
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
all_preds =
for name, model_info in models_info.items():
model = AutoModelForSequenceClassification.from_pretrained(model_info[“path”]).to(device)
tokenizer = model_info[“tokenizer”]
def tokenize_fn(batch):
return tokenizer(batch[“Text”], padding=“max_length”, truncation=True, max_length=128)
tokenized = dataset.map(tokenize_fn, batched=True)
tokenized.set_format(type=‘torch’, columns=[‘input_ids’, ‘attention_mask’, ‘label’]) #
FIXED INDENTATION
preds =
with torch.no_grad():
for batch in torch.utils.data.DataLoader(tokenized, batch_size=16):
#
Remove ‘label’ key before feeding into the model
batch = {k: v for k, v in batch.items() if k != ‘label’}
inputs = {k: v.to(device) for k, v in batch.items()}
outputs = model(**inputs)
logits = outputs.logits.cpu().numpy()
preds.extend(np.argmax(logits, axis=1))
all_preds.append(preds)
# === MAJORITY VOTING ===
# final_preds =
# all_preds_np = np.array(all_preds)
# for i in range(all_preds_np.shape[1]):
# votes = list(all_preds_np[:, i])
# final_preds.append(max(set(votes), key=votes.count))
# === MAJORITY VOTING (Safe Transpose Version) ===
all_preds_np = np.array(all_preds) # Shape: (num_models, num_samples)
print(“Prediction array shape (models, samples):”, all_preds_np.shape)
# Transpose to (num_samples, num_models) for row-wise voting
all_preds_np = all_preds_np.T
# Perform majority voting
final_preds = [np.bincount(row).argmax() for row in all_preds_np]
all_preds = # Start collecting predictions
for name, model_info in models_info.items():
model = AutoModelForSequenceClassification.from_pretrained(model_info[“path”]).to(device)
tokenizer = model_info[“tokenizer”]
def tokenize_fn(batch):
return tokenizer(batch["Text"], padding="max_length", truncation=True, max_length=128)
tokenized = dataset.map(tokenize_fn, batched=True)
tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
preds = []
with torch.no_grad():
for batch in torch.utils.data.DataLoader(tokenized, batch_size=16):
batch = {k: v for k, v in batch.items() if k != 'label'} # ✅ REMOVE label
inputs = {k: v.to(device) for k, v in batch.items()}
outputs = model(**inputs)
logits = outputs.logits.cpu().numpy()
preds.extend(np.argmax(logits, axis=1))
all_preds.append(preds) # ✅ Still inside the model loop
Place this block OUTSIDE the for loop over models:
=== MAJORITY VOTING (Safe Transpose Version) ===
all_preds_np = np.array(all_preds)
print(“Prediction array shape (models, samples):”, all_preds_np.shape)
Transpose to shape (samples, models)
all_preds_np = all_preds_np.T
Majority vote
final_preds = [np.bincount(row).argmax() for row in all_preds_np]
y_true = df[“label”].tolist()
=== METRICS ===
acc = accuracy_score(y_true, final_preds)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, final_preds, average=“weighted”)
print(“ Ensemble Evaluation Metrics:”)
print(f"Accuracy: {acc:.4f}“)
print(f"Precision: {precision:.4f}”)
print(f"Recall: {recall:.4f}“)
print(f"F1 Score: {f1:.4f}”)
=== CONFUSION MATRIX ===
cm = confusion_matrix(y_true, final_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=unique_labels)
plt.figure(figsize=(10, 7))
disp.plot(cmap=“Blues”, xticks_rotation=45)
plt.title(“Confusion Matrix - Ensemble Voting”)
plt.show()