167 lines
5.7 KiB
Python
167 lines
5.7 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
from __future__ import absolute_import
|
||
|
import re
|
||
|
import fractions
|
||
|
from collections import namedtuple
|
||
|
|
||
|
LabelScore = namedtuple('LabelScore', 'match model ref precision recall f1')
|
||
|
|
||
|
|
||
|
class TrainLogParser(object):
|
||
|
|
||
|
def __init__(self):
|
||
|
self.state = None
|
||
|
self.featgen_percent = -2
|
||
|
self.featgen_num_features = None
|
||
|
self.featgen_seconds = None
|
||
|
self.training_seconds = None
|
||
|
self.storing_seconds = None
|
||
|
|
||
|
self.iterations = []
|
||
|
self.last_iteration = None
|
||
|
self.log = []
|
||
|
self.events = []
|
||
|
|
||
|
def feed(self, line):
|
||
|
# if line != '\n':
|
||
|
self.log.append(line)
|
||
|
if self.state is None:
|
||
|
self.state = 'STARTING'
|
||
|
self.handle_STARTING(line)
|
||
|
self.events.append(('start', 0, len(self.log)))
|
||
|
return 'start'
|
||
|
event = getattr(self, "handle_" + self.state)(line)
|
||
|
if event is not None:
|
||
|
start, end = self.events[-1][2], len(self.log)
|
||
|
if event in ('prepared', 'optimization_end'):
|
||
|
end -= 1
|
||
|
self.events.append((event, start, end))
|
||
|
return event
|
||
|
|
||
|
@property
|
||
|
def last_log(self):
|
||
|
event, start, end = self.events[-1]
|
||
|
return ''.join(self.log[start:end])
|
||
|
|
||
|
def handle_STARTING(self, line):
|
||
|
if line.startswith('Feature generation'):
|
||
|
self.state = 'FEATGEN'
|
||
|
|
||
|
def handle_FEATGEN(self, line):
|
||
|
if line in "0123456789.10":
|
||
|
self.featgen_percent += 2
|
||
|
return 'featgen_progress'
|
||
|
|
||
|
m = re.match(r"Number of features: (\d+)", line)
|
||
|
if m:
|
||
|
self.featgen_num_features = int(m.group(1))
|
||
|
return None
|
||
|
|
||
|
if self._seconds(line) is not None:
|
||
|
self.featgen_seconds = self._seconds(line)
|
||
|
self.state = 'AFTER_FEATGEN'
|
||
|
return 'featgen_end'
|
||
|
|
||
|
def handle_AFTER_FEATGEN(self, line):
|
||
|
if self._iteration_head(line) is not None:
|
||
|
self.state = 'ITERATION'
|
||
|
self.handle_ITERATION(line)
|
||
|
return 'prepared'
|
||
|
|
||
|
if 'terminated with error' in line:
|
||
|
self.state = 'AFTER_ITERATION'
|
||
|
return 'prepare_error'
|
||
|
|
||
|
def handle_ITERATION(self, line):
|
||
|
if self._iteration_head(line) is not None:
|
||
|
self.last_iteration = {
|
||
|
'num': self._iteration_head(line),
|
||
|
'scores': {},
|
||
|
}
|
||
|
self.iterations.append(self.last_iteration)
|
||
|
elif line == '\n':
|
||
|
self.state = 'AFTER_ITERATION'
|
||
|
return 'iteration'
|
||
|
|
||
|
def add_re(key, pattern, typ):
|
||
|
m = re.match(pattern, line)
|
||
|
if m:
|
||
|
self.last_iteration[key] = typ(m.group(1))
|
||
|
|
||
|
add_re("loss", r"Loss: (\d+\.\d+)", float)
|
||
|
add_re("feature_norm", r"Feature norm: (\d+\.\d+)", float)
|
||
|
add_re("error_norm", r"Error norm: (\d+\.\d+)", float)
|
||
|
add_re("active_features", r"Active features: (\d+)", int)
|
||
|
add_re("linesearch_trials", r"Line search trials: (\d+)", int)
|
||
|
add_re("linesearch_step", r"Line search step: (\d+\.\d+)", float)
|
||
|
add_re("time", r"Seconds required for this iteration: (\d+\.\d+)", float)
|
||
|
|
||
|
m = re.match(r"Macro-average precision, recall, F1: \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)", line)
|
||
|
if m:
|
||
|
self.last_iteration['avg_precision'] = float(m.group(1))
|
||
|
self.last_iteration['avg_recall'] = float(m.group(2))
|
||
|
self.last_iteration['avg_f1'] = float(m.group(3))
|
||
|
|
||
|
m = re.match(r"Item accuracy: (\d+) / (\d+)", line)
|
||
|
if m:
|
||
|
acc = fractions.Fraction(int(m.group(1)), int(m.group(2)))
|
||
|
self.last_iteration['item_accuracy'] = acc
|
||
|
self.last_iteration['item_accuracy_float'] = float(acc)
|
||
|
|
||
|
m = re.match(r"Instance accuracy: (\d+) / (\d+)", line)
|
||
|
if m:
|
||
|
acc = fractions.Fraction(int(m.group(1)), int(m.group(2)))
|
||
|
self.last_iteration['instance_accuracy'] = acc
|
||
|
self.last_iteration['instance_accuracy_float'] = float(acc)
|
||
|
|
||
|
m = re.match(r"\s{4}(.+): \((\d+), (\d+), (\d+)\) \((\d\.\d+), (\d\.\d+), (\d\.\d+)\)", line)
|
||
|
if m:
|
||
|
self.last_iteration['scores'][m.group(1)] = LabelScore(**{
|
||
|
'match': int(m.group(2)),
|
||
|
'model': int(m.group(3)),
|
||
|
'ref': int(m.group(4)),
|
||
|
'precision': float(m.group(5)),
|
||
|
'recall': float(m.group(6)),
|
||
|
'f1': float(m.group(7)),
|
||
|
})
|
||
|
|
||
|
m = re.match(r"\s{4}(.+): \(0, 0, 0\) \(\*{6}, \*{6}, \*{6}\)", line)
|
||
|
if m:
|
||
|
self.last_iteration['scores'][m.group(1)] = LabelScore(**{
|
||
|
'match': 0,
|
||
|
'model': 0,
|
||
|
'ref': 0,
|
||
|
'precision': None,
|
||
|
'recall': None,
|
||
|
'f1': None,
|
||
|
})
|
||
|
|
||
|
def handle_AFTER_ITERATION(self, line):
|
||
|
if self._iteration_head(line) is not None:
|
||
|
self.state = 'ITERATION'
|
||
|
return self.handle_ITERATION(line)
|
||
|
|
||
|
m = re.match(r"Total seconds required for training: (\d+\.\d+)", line)
|
||
|
if m:
|
||
|
self.training_seconds = float(m.group(1))
|
||
|
|
||
|
if line.startswith('Storing the model'):
|
||
|
self.state = 'STORING'
|
||
|
return 'optimization_end'
|
||
|
|
||
|
def handle_STORING(self, line):
|
||
|
if line == '\n':
|
||
|
return 'end'
|
||
|
elif self._seconds(line):
|
||
|
self.storing_seconds = self._seconds(line)
|
||
|
|
||
|
def _iteration_head(self, line):
|
||
|
m = re.match(r'\*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n', line)
|
||
|
if m:
|
||
|
return int(m.group(1))
|
||
|
|
||
|
def _seconds(self, line):
|
||
|
m = re.match(r'Seconds required: (\d+\.\d+)', line)
|
||
|
if m:
|
||
|
return float(m.group(1))
|