-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDrain_demo.py
More file actions
19 lines (16 loc) · 781 Bytes
/
Drain_demo.py
File metadata and controls
19 lines (16 loc) · 781 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#!/usr/bin/env python
import Drain
input_dir = '/Users/mac/Repo/ReadCode/HDFS' # The input directory of log file
output_dir = 'Drain_result/' # The output directory of parsing results
log_file = 'HDFS_2k.log' # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
# Regular expression list for optional preprocessing (default: [])
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes
parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)