Here’s the text translated into English:
Hello, my goal is to generate tests for REST APIs. The interface here takes a URL and the number of test cases to generate as input. However, I have a problem with the test case generation that displays this:
['pm.test("Test case", function () {\n https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?api_key=2kMSgESLKxbpMKgm3piy61AznbUV7QZbtRafmMY4&sol=0 _______________________________________________ Sent through the Full Disclosure mailing list https://nasa.gov/mailman/listinfo/fulldisclosure Web Archives & RSS: http://seclists.\n});',
'pm.test("Test case", function () {\n https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?api_key=2kMSgESLKxbpMKgm3piy61AznbUV7QZbtRafmMY4&sol=0 _______________________________________________ Sent through the Full Disclosure mailing list https://nmap.org/mailman/listinfo/fulldisclosure Web Archives & RSS: http://seclists.\n});',
'pm.test("Test case", function () {\n https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?api_key=2kMSgESLKxbpMKgm3piy61AznbUV7QZbtRafmMY4&sol=0 _______________________________________________ Sent through the Full Disclosure mailing list https://nasa.gov/mailman/listinfo/fulldisclosure Web Archives & RSS: https://seclists.\n});',
'pm.test("Test case", function () {\n https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?api_key=2kMSgESLKxbpMKgm3piy61AznbUV7QZbtRafmMY4&sol=0 _______________________________________________ Sent through the Full Disclosure mailing list https://nasa.org/mailman/listinfo/fulldisclosure Web Archives & RSS: http://seclists.\n});',
'pm.test("Test case", function () {\n https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?api_key=2kMSgESLKxbpMKgm3piy61AznbUV7QZbtRafmMY4&sol=0 _______________________________________________ Sent through the Full Disclosure mailing list https://nasa.gov/mailman/listinfo/fulldisclosure Web Archives & RSS http://seclists.org\n});']
Here’s my training code:
import warnings
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
warnings.filterwarnings("ignore", category=FutureWarning)
# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Load the dataset
with open('dataset.json', 'r') as f:
dataset = json.load(f)
# Extract prompts and testcases from the dataset
prompts = [item['prompt'] for item in dataset]
testcases = [item.get('testcase', []) for item in dataset]
# Tokenize prompts and testcases
tokenized_prompts = [tokenizer(prompt, return_tensors='pt', padding=True, truncation=True) for prompt in prompts]
tokenized_testcases = [[tokenizer(testcase['content'], return_tensors='pt', padding=True, truncation=True) for testcase in test] for test in testcases]
# Find the maximum sequence length
max_lengths = []
for tp, test in zip(tokenized_prompts, tokenized_testcases):
prompt_length = len(tp['input_ids'][0]) if len(tp['input_ids'][0]) > 0 else 0
testcase_lengths = [len(tc['input_ids'][0]) for tc in test if len(tc['input_ids'][0]) > 0]
if testcase_lengths:
max_lengths.append(max(prompt_length, max(testcase_lengths)))
else:
max_lengths.append(prompt_length)
max_length = max(max_lengths)
# Define dataset class with the correct padding
class MyGPT2TestcaseGenerator(Dataset):
def __init__(self, data, tokenizer, max_length):
self.data = data
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
item = self.data[idx]
prompt = item['prompt']
testcase = item.get('testcase', [])
tokenized_prompt = self.tokenizer(prompt, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
tokenized_testcases = [self.tokenizer(tc['content'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt') for tc in testcase]
return tokenized_prompt, tokenized_testcases
def collate_fn(batch):
prompts, testcases = zip(*batch)
prompt_input_ids = [prompt['input_ids'].squeeze(0) for prompt in prompts]
padded_prompts = pad_sequence(prompt_input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
padded_testcases = []
for test in testcases:
if test:
test_input_ids = [tc['input_ids'].squeeze(0) for tc in test]
padded_testcases.append(pad_sequence(test_input_ids, batch_first=True, padding_value=tokenizer.pad_token_id))
else:
padded_testcases.append(torch.tensor([]))
return padded_prompts, padded_testcases
class MyGPT2Model(nn.Module):
def __init__(self):
super(MyGPT2Model, self).__init__()
self.gpt2 = GPT2LMHeadModel.from_pretrained('gpt2')
def forward(self, input_ids):
# Clamp input_ids to the valid range
input_ids = input_ids.clamp(max=self.gpt2.config.vocab_size - 1)
outputs = self.gpt2(input_ids)
return outputs.logits
if __name__ == '__main__':
train_dataset = MyGPT2TestcaseGenerator(dataset, tokenizer, max_length)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, drop_last=True, collate_fn=collate_fn)
model = MyGPT2Model()
# Set ignore_index to tokenizer.pad_token_id
criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
optimizer = optim.Adam(model.parameters(), lr=5e-5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for epoch in range(10): # Number of epochs
total_loss = 0.0
model.train()
for batch in train_loader:
prompts, testcases = batch
prompts = prompts.to(device)
optimizer.zero_grad()
outputs = model(prompts)
# Shift the inputs to the right to align them with the targets
shift_logits = outputs[..., :-1, :].contiguous()
shift_labels = prompts[..., 1:].contiguous()
loss = criterion(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
loss.backward()
optimizer.step()
total_loss += loss.item()
torch.save(model.state_dict(), 'testcase_generator_model.pth')
And my test code:
import torch
import torch.nn as nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from flask import Flask, render_template, request, send_file
import json
import os
import re
class MyGPT2Model(nn.Module):
def __init__(self):
super(MyGPT2Model, self).__init__()
self.gpt2 = GPT2LMHeadModel.from_pretrained('gpt2')
def forward(self, input_ids):
outputs = self.gpt2(input_ids)
return outputs.logits
def is_valid_endpoint(url):
pattern = r'^https?://(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^/]*)*$'
return re.match(pattern, url) is not None
def save_testcases_to_json(testcases, filename):
with open(filename, 'w') as f:
json.dump(testcases, f, indent=2)
def trainer(prompt_text, num):
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.load_state_dict(torch.load('testcase_generator_model.pth'), strict=False) # Load the adjusted model
model.eval()
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
num = int(num) # Ensure num is an integer
input_ids = tokenizer.encode(prompt_text, return_tensors='pt')
with torch.no_grad():
outputs = model.generate(input_ids, max_length=100, num_return_sequences=num, num_beams=5)
generated_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
# Format the output into Postman test scripts
postman_test_scripts = []
for text in generated_texts:
# Here, you'd
typically want to do some processing to make it valid for Postman, if needed
postman_test_scripts.append(f'pm.test("Test case", function () {{\n {text}\n}});')
return postman_test_scripts
app = Flask(__name__)
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
url = request.form.get('url')
num = request.form.get('num')
if not is_valid_endpoint(url):
return render_template('index.html', error='Invalid URL provided.')
try:
testcases = trainer(url, num)
save_testcases_to_json(testcases, 'generated_testcases.json')
return render_template('index.html', success='Test cases generated and saved.', filename='generated_testcases.json')
except Exception as e:
return render_template('index.html', error=str(e))
return render_template('index.html')
@app.route('/download/<filename>')
def download(filename):
return send_file(filename, as_attachment=True)
if __name__ == '__main__':
app.run(debug=True)