Skip to content
代码片段 群组 项目
process.py 2.9 KB
Newer Older
openaiops's avatar
openaiops 已提交
import csv
import json
import itertools

def process_csv(file_path, instructions, start_id):
    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            instruction = next(instructions)
            yield {
                "id": start_id,
                "instruction": instruction.format(log=row['Content']),
                "input": "\nlog entry:\n" + row['Content'],
                "output": row['EventTemplate']
            }
            start_id += 1

def main():
    instruction_templates = [
    "Parse the following log into a template format, replacing variable parts with a wildcard <*>",
    "Convert the following log into a standardized template by identifying and replacing the variable parts with a <*>",
    "Transform the raw log into a log template by replacing variable segments with <*>",
    "Given the following log, extract its common structure and replace variable parts with <*> to create a log template",
    "Create a log template from the raw log by replacing dynamic segments with <*>",
    "Identify the fixed format and variable parts in given log, replace variable parts with <*>",
    "Extract the unchanging template part from the given log, and mark the changing variable parts with <*>",
    "Conduct a structured analysis of the given log, finding template parts and variable parts identified by <*>",
    "Parse the structure of the given log, extract a template and mark all variables with <>",
    "Analyze the given log, identify and replace variable parts with <> to generate a template of the log",
    "Parse the provided log message into a structured log template. Whenever you identify a variable part, replace it with <*>",
    "You will be provided with a log message. Please extract the log templates and tag variables with <*> from this log message",
    "Delve into this log message, and meticulously draw out log templates and replace all the variables with <*>",
    "Pay attention to the templates and variables within the following log message. Diligently extract log templates and then tag variables with <*>",
    "You are a professional log analysis expert. Please fetch the templates in the given log and mark the variables with <*>"
]


    instructions = itertools.cycle([inst for inst in instruction_templates])
    
    file_paths = [
        'C:/Users/cty/Desktop/loghub-master/loghub-master/BGL/BGL_2k.log_structured.csv',
        'C:/Users/cty/Desktop/loghub-master/loghub-master/Thunderbird/Thunderbird_2k.log_structured.csv'
    ]

    evaluation_set = []
    start_id = 0
    for file_path in file_paths:
        evaluation_set.extend(process_csv(file_path, instructions, start_id))
        start_id = evaluation_set[-1]['id'] + 1

    with open('evaluation_set.json', 'w', encoding='utf-8') as jsonfile:
        json.dump(evaluation_set, jsonfile, indent=4)

if __name__ == "__main__":
    main()