Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import csv
import json
import itertools
def process_csv(file_path, instructions, start_id):
with open(file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
instruction = next(instructions)
yield {
"id": start_id,
"instruction": instruction.format(log=row['Content']),
"input": "\nlog entry:\n" + row['Content'],
"output": row['EventTemplate']
}
start_id += 1
def main():
instruction_templates = [
"Parse the following log into a template format, replacing variable parts with a wildcard <*>",
"Convert the following log into a standardized template by identifying and replacing the variable parts with a <*>",
"Transform the raw log into a log template by replacing variable segments with <*>",
"Given the following log, extract its common structure and replace variable parts with <*> to create a log template",
"Create a log template from the raw log by replacing dynamic segments with <*>",
"Identify the fixed format and variable parts in given log, replace variable parts with <*>",
"Extract the unchanging template part from the given log, and mark the changing variable parts with <*>",
"Conduct a structured analysis of the given log, finding template parts and variable parts identified by <*>",
"Parse the structure of the given log, extract a template and mark all variables with <>",
"Analyze the given log, identify and replace variable parts with <> to generate a template of the log",
"Parse the provided log message into a structured log template. Whenever you identify a variable part, replace it with <*>",
"You will be provided with a log message. Please extract the log templates and tag variables with <*> from this log message",
"Delve into this log message, and meticulously draw out log templates and replace all the variables with <*>",
"Pay attention to the templates and variables within the following log message. Diligently extract log templates and then tag variables with <*>",
"You are a professional log analysis expert. Please fetch the templates in the given log and mark the variables with <*>"
]
instructions = itertools.cycle([inst for inst in instruction_templates])
file_paths = [
'C:/Users/cty/Desktop/loghub-master/loghub-master/BGL/BGL_2k.log_structured.csv',
'C:/Users/cty/Desktop/loghub-master/loghub-master/Thunderbird/Thunderbird_2k.log_structured.csv'
]
evaluation_set = []
start_id = 0
for file_path in file_paths:
evaluation_set.extend(process_csv(file_path, instructions, start_id))
start_id = evaluation_set[-1]['id'] + 1
with open('evaluation_set.json', 'w', encoding='utf-8') as jsonfile:
json.dump(evaluation_set, jsonfile, indent=4)
if __name__ == "__main__":
main()