import csv import json import itertools def process_csv(file_path, instructions, start_id): with open(file_path, newline='', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for row in reader: instruction = next(instructions) yield { "id": start_id, "instruction": instruction.format(log=row['Content']), "input": "\nlog entry:\n" + row['Content'], "output": row['EventTemplate'] } start_id += 1 def main(): instruction_templates = [ "Parse the following log into a template format, replacing variable parts with a wildcard <*>", "Convert the following log into a standardized template by identifying and replacing the variable parts with a <*>", "Transform the raw log into a log template by replacing variable segments with <*>", "Given the following log, extract its common structure and replace variable parts with <*> to create a log template", "Create a log template from the raw log by replacing dynamic segments with <*>", "Identify the fixed format and variable parts in given log, replace variable parts with <*>", "Extract the unchanging template part from the given log, and mark the changing variable parts with <*>", "Conduct a structured analysis of the given log, finding template parts and variable parts identified by <*>", "Parse the structure of the given log, extract a template and mark all variables with <>", "Analyze the given log, identify and replace variable parts with <> to generate a template of the log", "Parse the provided log message into a structured log template. Whenever you identify a variable part, replace it with <*>", "You will be provided with a log message. Please extract the log templates and tag variables with <*> from this log message", "Delve into this log message, and meticulously draw out log templates and replace all the variables with <*>", "Pay attention to the templates and variables within the following log message. Diligently extract log templates and then tag variables with <*>", "You are a professional log analysis expert. Please fetch the templates in the given log and mark the variables with <*>" ] instructions = itertools.cycle([inst for inst in instruction_templates]) file_paths = [ 'C:/Users/cty/Desktop/loghub-master/loghub-master/BGL/BGL_2k.log_structured.csv', 'C:/Users/cty/Desktop/loghub-master/loghub-master/Thunderbird/Thunderbird_2k.log_structured.csv' ] evaluation_set = [] start_id = 0 for file_path in file_paths: evaluation_set.extend(process_csv(file_path, instructions, start_id)) start_id = evaluation_set[-1]['id'] + 1 with open('evaluation_set.json', 'w', encoding='utf-8') as jsonfile: json.dump(evaluation_set, jsonfile, indent=4) if __name__ == "__main__": main()