when I trainer.train()
, I get
BASE_MODEL = "google/gemma-2-2b-it"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL,truncation=True,padding='max_length')
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,num_labels=780,id2label=id2label_1, label2id=label2id)
model.config.problem_type='single_label_classification'
trainer=Trainer(
model =model,
train_dataset=tokenized_datasets['train'],
eval_dataset=tokenized_datasets['test'],
args =TrainingArguments(
output_dir="./content/drive/MyDrive/output_1002/",
remove_unused_columns=True,
learning_rate=1e-3,
weight_decay=0.01,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
num_train_epochs=1,
optim='paged_adamw_8bit',
eval_strategy='steps',
save_strategy='steps',
logging_dir='./logs',
logging_steps=1,
max_steps=10,
save_total_limit=2,
push_to_hub=False,
bf16=True,
load_best_model_at_end=True,
report_to='none',
max_grad_norm=1.0,),
data_collator=data_collator,
tokenizer=tokenizer,
)
I’ve tried
lr
from 2e-7 to 1e-3,
max_steps
from 10 to 100000,
fp16
True,False
and result of below code is
print(tokenized_datasets['train'][0])
{‘label’: 61, ‘input_ids’: [2, 7639, 1162, 9160, 9999, 573, 13107, 576, 3757, 4063, 235303, 24261, 611, 65707, 5158, 578, 24023], ‘attention_mask’: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
and model.config
is
Gemma2Config {
"_name_or_path": "google/gemma-2-2b-it",
"architectures": [
"Gemma2ForSequenceClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": 50.0,
"bos_token_id": 2,
"cache_implementation": "hybrid",
"eos_token_id": [
1,
107
],
"final_logit_softcapping": 30.0,
"head_dim": 256,
"hidden_act": "gelu_pytorch_tanh",
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 2304,
"id2label": {
"0": "-0.003",
"1": "0.006",
"2": "-0.002",
"3": "0.005",
"4": "-0.021",
"5": "-0.02",
"6": "0.041",
"7": "0.035",
"8": "0.037",
"9": "0.042",
"10": "0.044",
"11": "-0.022",
"12": "0.01",
"13": "-0.008",
"14": "0.016",
"15": "0.011",
"16": "0.013",
"17": "0.004",
"18": "0.001",
"19": "-0.0",
"20": "0.0",
"21": "0.046",
"22": "0.047",
"23": "0.045",
"24": "-0.023",
"25": "0.043",
"26": "0.027",
"27": "-0.013",
"28": "0.025",
"29": "-0.012",
"30": "-0.011",
"31": "-0.007",
"32": "0.026",
"33": "0.021",
"34": "0.024",
"35": "0.023",
"36": "0.022",
"37": "0.007",
"38": "0.155",
"39": "-0.076",
"40": "-0.077",
"41": "-0.078",
"42": "-0.074",
"43": "0.157",
"44": "0.012",
"45": "-0.006",
"46": "0.009",
"47": "-0.004",
"48": "-0.005",
"49": "-0.024",
"50": "0.036",
"51": "0.017",
"52": "0.018",
"53": "0.019",
"54": "-0.009",
"55": "0.02",
"56": "-0.01",
"57": "0.015",
"58": "0.008",
"59": "0.003",
"60": "0.002",
"61": "-0.001",
"62": "-0.026",
"63": "-0.025",
"64": "0.051",
"65": "0.052",
"66": "0.033",
"67": "0.053",
"68": "0.049",
"69": "-0.017",
"70": "0.05",
"71": "-0.051",
"72": "-0.05",
"73": "0.069",
"74": "-0.049",
"75": "0.097",
"76": "-0.038",
"77": "-0.047",
"78": "-0.04",
"79": "-0.041",
"80": "0.104",
"81": "-0.048",
"82": "0.081",
"83": "-0.034",
"84": "0.107",
"85": "0.106",
"86": "-0.045",
"87": "0.085",
"88": "-0.052",
"89": "-0.053",
"90": "-0.039",
"91": "0.105",
"92": "-0.029",
"93": "-0.015",
"94": "-0.016",
"95": "0.034",
"96": "0.032",
"97": "0.031",
"98": "0.058",
"99": "-0.028",
"100": "0.057",
"101": "0.055",
"102": "-0.027",
"103": "0.014",
"104": "-0.014",
"105": "0.028",
"106": "-0.033",
"107": "0.061",
"108": "0.059",
"109": "0.067",
"110": "-0.031",
"111": "-0.032",
"112": "-0.018",
"113": "0.063",
"114": "0.054",
"115": "0.04",
"116": "-0.019",
"117": "0.195",
"118": "-0.101",
"119": "0.207",
"120": "-0.099",
"121": "-0.097",
"122": "0.175",
"123": "-0.098",
"124": "0.092",
"125": "-0.043",
"126": "-0.046",
"127": "0.068",
"128": "-0.037",
"129": "-0.042",
"130": "0.093",
"131": "0.087",
"132": "0.094",
"133": "0.091",
"134": "-0.044",
"135": "0.089",
"136": "0.126",
"137": "-0.062",
"138": "-0.063",
"139": "-0.06",
"140": "-0.058",
"141": "0.128",
"142": "-0.061",
"143": "0.115",
"144": "-0.056",
"145": "-0.057",
"146": "-0.054",
"147": "0.079",
"148": "0.095",
"149": "0.084",
"150": "0.038",
"151": "0.103",
"152": "0.102",
"153": "0.101",
"154": "0.113",
"155": "0.122",
"156": "0.111",
"157": "0.125",
"158": "0.096",
"159": "-0.055",
"160": "0.086",
"161": "0.117",
"162": "0.088",
"163": "0.09",
"164": "-0.035",
"165": "0.074",
"166": "0.098",
"167": "0.099",
"168": "0.029",
"169": "0.03",
"170": "0.066",
"171": "-0.083",
"172": "-0.084",
"173": "-0.082",
"174": "0.048",
"175": "0.064",
"176": "0.065",
"177": "-0.03",
"178": "0.039",
"179": "0.073",
"180": "0.075",
"181": "0.07",
"182": "0.078",
"183": "-0.036",
"184": "0.08",
"185": "0.056",
"186": "0.082",
"187": "0.077",
"188": "-0.073",
"189": "-0.08",
"190": "-0.081",
"191": "-0.075",
"192": "0.168",
"193": "0.174",
"194": "-0.086",
"195": "0.177",
"196": "0.171",
"197": "0.17",
"198": "0.076",
"199": "0.16",
"200": "-0.079",
"201": "0.159",
"202": "-0.064",
"203": "-0.106",
"204": "0.145",
"205": "-0.103",
"206": "-0.105",
"207": "-0.104",
"208": "0.208",
"209": "-0.07",
"210": "-0.071",
"211": "-0.069",
"212": "-0.09",
"213": "-0.091",
"214": "0.142",
"215": "0.187",
"216": "0.18",
"217": "0.169",
"218": "0.162",
"219": "-0.059",
"220": "0.138",
"221": "0.153",
"222": "-0.072",
"223": "0.15",
"224": "-0.065",
"225": "0.123",
"226": "0.12",
"227": "0.124",
"228": "0.121",
"229": "-0.113",
"230": "-0.109",
"231": "0.119",
"232": "-0.066",
"233": "0.136",
"234": "-0.067",
"235": "0.137",
"236": "0.109",
"237": "0.108",
"238": "0.163",
"239": "0.156",
"240": "-0.068",
"241": "0.135",
"242": "0.134",
"243": "0.1",
"244": "0.118",
"245": "0.129",
"246": "0.13",
"247": "0.06",
"248": "0.062",
"249": "0.143",
"250": "0.165",
"251": "0.164",
"252": "0.172",
"253": "0.167",
"254": "0.139",
"255": "0.147",
"256": "0.149",
"257": "0.14",
"258": "0.2",
"259": "0.202",
"260": "-0.088",
"261": "0.194",
"262": "0.21",
"263": "-0.108",
"264": "-0.107",
"265": "0.197",
"266": "0.213",
"267": "0.11",
"268": "-0.1",
"269": "-0.092",
"270": "0.192",
"271": "0.141",
"272": "0.112",
"273": "0.293",
"274": "-0.136",
"275": "-0.144",
"276": "-0.142",
"277": "0.146",
"278": "0.133",
"279": "0.071",
"280": "0.083",
"281": "-0.185",
"282": "-0.169",
"283": "-0.18",
"284": "0.379",
"285": "-0.143",
"286": "0.199",
"287": "-0.184",
"288": "0.148",
"289": "0.161",
"290": "0.154",
"291": "0.144",
"292": "0.116",
"293": "0.158",
"294": "0.22",
"295": "-0.111",
"296": "-0.112",
"297": "0.251",
"298": "0.259",
"299": "0.266",
"300": "0.265",
"301": "0.072",
"302": "0.114",
"303": "0.131",
"304": "0.132",
"305": "-0.093",
"306": "0.211",
"307": "-0.102",
"308": "-0.096",
"309": "-0.095",
"310": "0.219",
"311": "-0.126",
"312": "-0.133",
"313": "-0.138",
"314": "-0.137",
"315": "-0.087",
"316": "0.196",
"317": "0.32",
"318": "-0.157",
"319": "-0.158",
"320": "0.185",
"321": "0.321",
"322": "-0.094",
"323": "-0.152",
"324": "0.322",
"325": "0.318",
"326": "0.179",
"327": "-0.089",
"328": "0.183",
"329": "0.182",
"330": "0.227",
"331": "0.224",
"332": "0.191",
"333": "0.205",
"334": "0.173",
"335": "0.178",
"336": "-0.085",
"337": "0.127",
"338": "-0.145",
"339": "0.273",
"340": "-0.141",
"341": "0.218",
"342": "-0.11",
"343": "0.225",
"344": "0.189",
"345": "0.186",
"346": "-0.116",
"347": "0.232",
"348": "0.226",
"349": "0.176",
"350": "0.201",
"351": "0.235",
"352": "-0.115",
"353": "0.166",
"354": "0.248",
"355": "-0.125",
"356": "-0.121",
"357": "-0.124",
"358": "0.188",
"359": "0.231",
"360": "-0.119",
"361": "0.243",
"362": "-0.114",
"363": "-0.123",
"364": "-0.129",
"365": "-0.118",
"366": "0.253",
"367": "-0.128",
"368": "0.215",
"369": "0.214",
"370": "0.24",
"371": "0.233",
"372": "0.152",
"373": "0.212",
"374": "-0.127",
"375": "-0.117",
"376": "0.328",
"377": "-0.164",
"378": "-0.168",
"379": "-0.166",
"380": "0.257",
"381": "0.25",
"382": "0.193",
"383": "0.238",
"384": "0.223",
"385": "0.247",
"386": "0.244",
"387": "-0.153",
"388": "-0.151",
"389": "0.237",
"390": "0.221",
"391": "0.206",
"392": "-0.163",
"393": "-0.148",
"394": "-0.162",
"395": "0.334",
"396": "0.229",
"397": "0.216",
"398": "0.151",
"399": "0.649",
"400": "-0.337",
"401": "-0.301",
"402": "0.264",
"403": "0.269",
"404": "-0.131",
"405": "0.217",
"406": "0.222",
"407": "0.181",
"408": "0.278",
"409": "0.288",
"410": "0.203",
"411": "0.23",
"412": "0.236",
"413": "0.337",
"414": "0.277",
"415": "-0.13",
"416": "-0.156",
"417": "0.256",
"418": "0.252",
"419": "0.184",
"420": "0.204",
"421": "0.258",
"422": "0.291",
"423": "0.275",
"424": "-0.154",
"425": "0.261",
"426": "0.26",
"427": "-0.132",
"428": "0.28",
"429": "-0.135",
"430": "0.234",
"431": "0.19",
"432": "0.198",
"433": "-0.14",
"434": "0.299",
"435": "-0.146",
"436": "0.297",
"437": "0.289",
"438": "-0.139",
"439": "-0.12",
"440": "0.249",
"441": "0.283",
"442": "0.271",
"443": "0.228",
"444": "0.305",
"445": "-0.15",
"446": "0.302",
"447": "0.306",
"448": "0.3",
"449": "-0.147",
"450": "0.209",
"451": "-0.231",
"452": "0.414",
"453": "-0.23",
"454": "0.336",
"455": "-0.165",
"456": "-0.167",
"457": "0.281",
"458": "-0.149",
"459": "0.461",
"460": "-0.229",
"461": "0.463",
"462": "0.309",
"463": "-0.193",
"464": "-0.122",
"465": "0.241",
"466": "0.267",
"467": "0.276",
"468": "0.274",
"469": "0.239",
"470": "-0.155",
"471": "0.303",
"472": "0.242",
"473": "0.255",
"474": "0.263",
"475": "-0.256",
"476": "-0.251",
"477": "-0.255",
"478": "0.438",
"479": "0.383",
"480": "0.446",
"481": "-0.219",
"482": "0.443",
"483": "-0.217",
"484": "0.292",
"485": "0.295",
"486": "0.262",
"487": "0.245",
"488": "0.296",
"489": "0.294",
"490": "-0.134",
"491": "-0.16",
"492": "0.254",
"493": "0.284",
"494": "0.279",
"495": "0.349",
"496": "-0.171",
"497": "-0.17",
"498": "-0.247",
"499": "-0.241",
"500": "0.331",
"501": "-0.161",
"502": "0.327",
"503": "0.33",
"504": "0.329",
"505": "0.378",
"506": "-0.186",
"507": "-0.181",
"508": "0.343",
"509": "-0.182",
"510": "0.365",
"511": "0.246",
"512": "-0.199",
"513": "-0.196",
"514": "-0.198",
"515": "-0.195",
"516": "0.405",
"517": "-0.177",
"518": "-0.179",
"519": "-0.188",
"520": "0.272",
"521": "0.282",
"522": "-0.159",
"523": "-0.225",
"524": "-0.226",
"525": "0.46",
"526": "0.391",
"527": "0.34",
"528": "-0.189",
"529": "-0.194",
"530": "0.408",
"531": "-0.222",
"532": "-0.224",
"533": "0.324",
"534": "0.287",
"535": "-0.191",
"536": "-0.19",
"537": "-0.206",
"538": "-0.213",
"539": "-0.212",
"540": "0.285",
"541": "0.326",
"542": "0.335",
"543": "0.29",
"544": "0.301",
"545": "0.286",
"546": "-0.178",
"547": "0.37",
"548": "0.374",
"549": "0.362",
"550": "-0.175",
"551": "0.401",
"552": "0.403",
"553": "0.31",
"554": "0.307",
"555": "0.304",
"556": "0.308",
"557": "0.27",
"558": "0.407",
"559": "0.406",
"560": "-0.203",
"561": "0.404",
"562": "0.4",
"563": "-0.205",
"564": "0.333",
"565": "-1.134",
"566": "-0.333",
"567": "-0.33",
"568": "-0.22",
"569": "-0.218",
"570": "-0.187",
"571": "-0.173",
"572": "0.341",
"573": "0.268",
"574": "0.348",
"575": "0.347",
"576": "0.523",
"577": "-0.262",
"578": "-0.25",
"579": "0.516",
"580": "-0.174",
"581": "-0.172",
"582": "0.339",
"583": "0.357",
"584": "0.354",
"585": "0.352",
"586": "0.444",
"587": "-0.192",
"588": "0.493",
"589": "-0.237",
"590": "-0.483",
"591": "0.905",
"592": "-0.473",
"593": "-0.488",
"594": "0.325",
"595": "0.355",
"596": "-0.176",
"597": "-1.061",
"598": "0.393",
"599": "0.311",
"600": "0.381",
"601": "0.396",
"602": "0.521",
"603": "-0.489",
"604": "0.993",
"605": "0.387",
"606": "0.504",
"607": "-0.248",
"608": "-0.267",
"609": "-0.269",
"610": "0.539",
"611": "-0.263",
"612": "0.389",
"613": "-0.209",
"614": "-0.208",
"615": "0.427",
"616": "0.316",
"617": "0.429",
"618": "-0.211",
"619": "-0.242",
"620": "-0.239",
"621": "-0.243",
"622": "0.454",
"623": "0.518",
"624": "-0.233",
"625": "-0.2",
"626": "0.398",
"627": "-0.236",
"628": "-0.228",
"629": "-0.235",
"630": "0.332",
"631": "0.345",
"632": "0.344",
"633": "0.317",
"634": "1.059",
"635": "1.147",
"636": "0.323",
"637": "-0.272",
"638": "0.544",
"639": "-0.271",
"640": "0.298",
"641": "0.38",
"642": "0.367",
"643": "-0.183",
"644": "0.375",
"645": "-0.258",
"646": "-0.26",
"647": "0.482",
"648": "-0.326",
"649": "-0.325",
"650": "0.615",
"651": "-0.234",
"652": "0.488",
"653": "-0.232",
"654": "-0.238",
"655": "0.425",
"656": "0.413",
"657": "0.368",
"658": "0.361",
"659": "-0.254",
"660": "-0.253",
"661": "0.314",
"662": "0.315",
"663": "0.342",
"664": "0.358",
"665": "0.366",
"666": "-0.24",
"667": "0.487",
"668": "0.483",
"669": "0.496",
"670": "-0.244",
"671": "-0.207",
"672": "0.497",
"673": "-0.438",
"674": "-0.439",
"675": "-0.441",
"676": "-0.394",
"677": "0.474",
"678": "0.399",
"679": "-0.321",
"680": "-0.314",
"681": "-0.316",
"682": "0.376",
"683": "-0.227",
"684": "-0.204",
"685": "0.426",
"686": "-0.214",
"687": "0.39",
"688": "0.431",
"689": "-0.535",
"690": "0.371",
"691": "0.356",
"692": "0.346",
"693": "-0.223",
"694": "0.409",
"695": "0.554",
"696": "0.582",
"697": "0.595",
"698": "-0.216",
"699": "0.535",
"700": "-0.257",
"701": "0.364",
"702": "0.433",
"703": "0.441",
"704": "0.44",
"705": "-0.215",
"706": "0.35",
"707": "-0.21",
"708": "-0.201",
"709": "0.382",
"710": "-0.27",
"711": "-0.266",
"712": "0.538",
"713": "0.546",
"714": "0.532",
"715": "0.363",
"716": "0.473",
"717": "-0.202",
"718": "0.394",
"719": "-0.197",
"720": "0.372",
"721": "0.384",
"722": "0.312",
"723": "0.313",
"724": "0.319",
"725": "0.574",
"726": "-0.279",
"727": "-0.304",
"728": "-0.309",
"729": "-0.312",
"730": "0.353",
"731": "0.36",
"732": "0.485",
"733": "0.373",
"734": "0.533",
"735": "0.543",
"736": "-0.268",
"737": "-0.292",
"738": "-0.289",
"739": "0.591",
"740": "-0.221",
"741": "0.511",
"742": "0.51",
"743": "0.359",
"744": "-0.287",
"745": "-0.3",
"746": "0.377",
"747": "0.417",
"748": "-0.252",
"749": "0.519",
"750": "0.464",
"751": "0.397",
"752": "0.467",
"753": "0.369",
"754": "-0.323",
"755": "-0.327",
"756": "0.447",
"757": "0.419",
"758": "0.455",
"759": "0.385",
"760": "-0.261",
"761": "0.457",
"762": "0.428",
"763": "0.432",
"764": "0.452",
"765": "0.338",
"766": "0.351",
"767": "0.54",
"768": "0.439",
"769": "0.537",
"770": "0.416",
"771": "0.43",
"772": "0.421",
"773": "-0.276",
"774": "0.458",
"775": "0.411",
"776": "0.423",
"777": "-0.805",
"778": "-0.283",
"779": "-0.259"
},
"initializer_range": 0.02,
"intermediate_size": 9216,
"max_position_embeddings": 8192,
"model_type": "gemma2",
"num_attention_heads": 8,
"num_hidden_layers": 26,
"num_key_value_heads": 4,
"pad_token_id": 0,
"problem_type": "single_label_classification",
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_theta": 10000.0,
"sliding_window": 4096,
"torch_dtype": "float32",
"transformers_version": "4.44.2",
"use_cache": true,
"vocab_size": 256000
}
I removed label2id from result because text is too long.
What should I do to solve this problem?