Spaces:

freeEDU
/

Log-Decoder

Runtime error

jpcabangon

init logdecoder app files

9c323ee over 2 years ago

4.14 kB

	"""
	The implementation of PCA model for anomaly detection.

	Authors:
	LogPAI Team

	Reference:
	[1] Wei Xu, Ling Huang, Armando Fox, David Patterson, Michael I. Jordan.
	Large-Scale System Problems Detection by Mining Console Logs. ACM
	Symposium on Operating Systems Principles (SOSP), 2009.

	"""

	import numpy as np
	from ..utils import metrics

	class PCA(object):

	def __init__(self, n_components=0.95, threshold=None, c_alpha=3.2905):
	""" The PCA model for anomaly detection

	Attributes
	----------
	proj_C: The projection matrix for projecting feature vector to abnormal space
	n_components: float/int, number of principal compnents or the variance ratio they cover
	threshold: float, the anomaly detection threshold. When setting to None, the threshold
	is automatically caculated using Q-statistics
	c_alpha: float, the c_alpha parameter for caculating anomaly detection threshold using
	Q-statistics. The following is lookup table for c_alpha:
	c_alpha = 1.7507; # alpha = 0.08
	c_alpha = 1.9600; # alpha = 0.05
	c_alpha = 2.5758; # alpha = 0.01
	c_alpha = 2.807; # alpha = 0.005
	c_alpha = 2.9677; # alpha = 0.003
	c_alpha = 3.2905; # alpha = 0.001
	c_alpha = 3.4808; # alpha = 0.0005
	c_alpha = 3.8906; # alpha = 0.0001
	c_alpha = 4.4172; # alpha = 0.00001
	"""

	self.proj_C = None
	self.components = None
	self.n_components = n_components
	self.threshold = threshold
	self.c_alpha = c_alpha


	def fit(self, X):
	"""
	Auguments
	---------
	X: ndarray, the event count matrix of shape num_instances-by-num_events
	"""

	print('====== Model summary ======')
	num_instances, num_events = X.shape
	X_cov = np.dot(X.T, X) / float(num_instances)
	U, sigma, V = np.linalg.svd(X_cov)
	n_components = self.n_components
	if n_components < 1:
	total_variance = np.sum(sigma)
	variance = 0
	for i in range(num_events):
	variance += sigma[i]
	if variance / total_variance >= n_components:
	break
	n_components = i + 1

	P = U[:, :n_components]
	I = np.identity(num_events, int)
	self.components = P
	self.proj_C = I - np.dot(P, P.T)
	print('n_components: {}'.format(n_components))
	print('Project matrix shape: {}-by-{}'.format(self.proj_C.shape[0], self.proj_C.shape[1]))

	if not self.threshold:
	# Calculate threshold using Q-statistic. Information can be found at:
	# http://conferences.sigcomm.org/sigcomm/2004/papers/p405-lakhina111.pdf
	phi = np.zeros(3)
	for i in range(3):
	for j in range(n_components, num_events):
	phi[i] += np.power(sigma[j], i + 1)
	h0 = 1.0 - 2 * phi[0] * phi[2] / (3.0 * phi[1] * phi[1])
	self.threshold = phi[0] * np.power(self.c_alpha * np.sqrt(2 * phi[1] * h0 * h0) / phi[0]
	+ 1.0 + phi[1] * h0 * (h0 - 1) / (phi[0] * phi[0]),
	1.0 / h0)
	print('SPE threshold: {}\n'.format(self.threshold))

	def predict(self, X):
	assert self.proj_C is not None, 'PCA model needs to be trained before prediction.'
	y_pred = np.zeros(X.shape[0])
	for i in range(X.shape[0]):
	y_a = np.dot(self.proj_C, X[i, :])
	SPE = np.dot(y_a, y_a)
	if SPE > self.threshold:
	y_pred[i] = 1
	return y_pred

	def evaluate(self, X, y_true):
	print('====== Evaluation summary ======')
	y_pred = self.predict(X)
	precision, recall, f1 = metrics(y_pred, y_true)
	print('Precision: {:.3f}, recall: {:.3f}, F1-measure: {:.3f}\n'.format(precision, recall, f1))
	return precision, recall, f1