Resolve TypeError: expected Tensor as element 1 in argument 0, but got NoneType

While executing this script, I get the following Typerror. From debug I see that the training dataset, test datasets are not NoneType. Please help me fix this issue.

modeling_time_series_transformer.py", line 1378, in forward
transformer_inputs, loc, scale, static_feat = self.create_network_inputs(
File “.venv\Lib\site-packages\transformers\models\time_series_transformer\modeling_time_series_transformer.py”, line 1252, in create_network_inputs
torch.cat(
TypeError: expected Tensor as element 1 in argument 0, but got NoneType

import pandas as pd
import numpy as np
from transformers import TimeSeriesTransformerModel, TimeSeriesTransformerConfig, Trainer, TrainingArguments, default_data_collator
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch.utils.data import Dataset

# Load the CSV file
file_path = './spy-stock-price - Spy_Ind_Signal.csv'
data = pd.read_csv(file_path)

# Exclude specified columns
exclude_columns = ['20SMA','50SMA','200SMA','20EMA','10EMA','AVWAP']
data = data.drop(columns=exclude_columns)

# Preprocess the data
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values('Date')

data_transformed = data

# Drop the 'Date' column
data_transformed = data_transformed.drop(columns=['Date'])

# Normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_transformed)

# Convert the data to a supervised learning problem
def create_dataset(data, look_back=1):
    X, Y = [], []
    for i in range(len(data) - look_back - 1):
        a = data[i:(i + look_back)]
        X.append(a)
        Y.append(data[i + look_back, -1:])  # Include the last two columns as targets
        if Y[-1] is None:
            print(f"NoneType found at index {i + look_back}")
            print(f"data shape: {data.shape}")
            print(f"data[{i + look_back}] = {data[i + look_back]}")
    return np.array(X), np.array(Y)

look_back = 3
X, y = create_dataset(scaled_data, look_back)

# Split into train and test sets
train_size = int(len(X) * 0.67)
X_train, X_test = X[0:train_size], X[train_size:]
y_train, y_test = y[0:train_size], y[train_size:]

# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], data_transformed.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], data_transformed.shape[1]))

# Create observed mask for the transformer model
def create_observed_mask(data):
    mask = np.ones_like(data, dtype=np.float32)
    return mask

train_observed_mask = create_observed_mask(X_train)
test_observed_mask = create_observed_mask(X_test)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
train_observed_mask = torch.tensor(train_observed_mask, dtype=torch.float32)
test_observed_mask = torch.tensor(test_observed_mask, dtype=torch.float32)

# Create a custom dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, observed_mask):
        self.X = X
        self.y = y
        self.observed_mask = observed_mask

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        sample = {
            'past_values': self.X[idx],
            'past_time_features': self.X[idx],  # Assuming no extra time features, just using past values
            'past_observed_mask': self.observed_mask[idx],
            'future_values': self.y[idx],  # Correct shape for future values
        }
        print(f"Sample {idx} - past_values shape: {sample['past_values'].shape}, past_time_features shape: {sample['past_time_features'].shape}, past_observed_mask shape: {sample['past_observed_mask'].shape}, future_values shape: {sample['future_values'].shape}")
        return sample

train_dataset = TimeSeriesDataset(X_train, y_train, train_observed_mask)
test_dataset = TimeSeriesDataset(X_test, y_test, test_observed_mask)

# Model configuration
config = TimeSeriesTransformerConfig(
    prediction_length=1,
    context_length=look_back,
    lags_seq=[1, 2, 3],
    input_size=data_transformed.shape[1],
    output_size=1,  # Predicting both price_change and bull_bear_signal
    num_time_features=1,  # No extra time features
    num_static_categorical_features=0,
    num_static_real_features=0,
    cardinality=[],
    embedding_dimension=[]
)

model = TimeSeriesTransformerModel(config)

# Training configuration
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=1e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
)

# Training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=default_data_collator,
)

# Training
trainer.train()

# Evaluation
predictions, labels, _ = trainer.predict(test_dataset)

# Inverse transform the predictions and labels
predictions = scaler.inverse_transform(predictions)
labels = scaler.inverse_transform(y_test.numpy())

# Separate the predictions and labels for price_change and bull_bear_signal
predictions_bull_bear_signal = predictions[:, 0]
labels_bull_bear_signal = labels[:, 0]

# For bull_bear_signal, we can use accuracy as the metric
accuracy_bull_bear_signal = np.mean(predictions_bull_bear_signal.round() == labels_bull_bear_signal.round())
print(f"Accuracy for Bull Bear Signal: {accuracy_bull_bear_signal}")