Spaces:

JP-SystemsX
/

nDCG

Runtime error

nDCG / nDCG.py

JP-SystemsX

Minor tweaks

220984d about 3 years ago

5.23 kB

	import evaluate as ev
	from sklearn.metrics import ndcg_score
	import datasets

	_DESCRIPTION = """
	Compute Normalized Discounted Cumulative Gain.

	Sums the true scores ranked in the order induced by the predicted scores,
	after applying a logarithmic discount. Then divides by the best possible
	score (Ideal DCG, obtained for a perfect ranking) to obtain a score between
	0 and 1.

	This ranking metric returns a high value if true labels are ranked high by
	``predictions``.

	If a value for k is given to the metric, it will only consider the k highest
	scores in the ranking

	References
	----------

	`Wikipedia entry for Discounted Cumulative Gain
	<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_

	Jarvelin, K., & Kekalainen, J. (2002).
	Cumulated gain-based evaluation of IR techniques. ACM Transactions on
	Information Systems (TOIS), 20(4), 422-446.

	Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).
	A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th
	Annual Conference on Learning Theory (COLT 2013).

	McSherry, F., & Najork, M. (2008, March). Computing information retrieval
	performance measures efficiently in the presence of tied scores. In
	European conference on information retrieval (pp. 414-421). Springer,
	Berlin, Heidelberg.
	"""

	_KWARGS_DESCRIPTION = """
	Args:
	references ('list' of 'float'): True relevance

	predictions ('list' of 'float'): Either predicted relevance, probability estimates or confidence values

	k (int): If set to a value, only the k highest scores in the ranking will be considered, else considers all outputs.
	Defaults to None.

	sample_weight (`list` of `float`): Sample weights Defaults to None.

	ignore_ties ('boolean'): If set to true, assumes that there are no ties (this is likely if predictions are continuous)
	for efficiency gains. Defaults to False.

	Returns:
	normalized_discounted_cumulative_gain ('float'): The averaged nDCG scores for all samples.
	Minimum possible value is 0.0 Maximum possible value is 1.0

	Examples:
	Example 1-A simple example
	>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
	>>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]])
	>>> print(results)
	{'nDCG': 0.6956940443813076}
	Example 2-The same as Example 1, except with k set to 3.
	>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
	>>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]], k=3)
	>>> print(results)
	{'nDCG@3': 0.4123818817534531}
	Example 3-There is only one relevant label, but there is a tie and the model can't decide which one is the one.
	>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
	>>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1)
	>>> print(results)
	{'nDCG@1': 0.5}
	>>> #That is it calculates both and returns the average of both
	Example 4-The Same as 3, except ignore_ties is set to True.
	>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
	>>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1, ignore_ties=True)
	>>> print(results)
	{'nDCG@1': 0.0}
	>>> # Alternative Result: {'nDCG@1': 1.0}
	>>> # That is it chooses one of the 2 candidates and calculates the score only for this one
	>>> # That means the score may vary depending on which one was chosen
	"""

	_CITATION = """
	@article{scikit-learn,
	title={Scikit-learn: Machine Learning in {P}ython},
	author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
	and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
	and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
	Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
	journal={Journal of Machine Learning Research},
	volume={12},
	pages={2825--2830},
	year={2011}
	}
	"""

	@ev.utils.file_utils.add_start_docstrings("_DESCRIPTION", "_KWARGS_DESCRIPTION")
	class nDCG(ev.Metric):
	def _info(self):
	return ev.MetricInfo(
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	features=datasets.Features({
	'predictions': datasets.Sequence(datasets.Value('float')),
	'references': datasets.Sequence(datasets.Value('float'))
	}),
	reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html"],
	)

	def _compute(self, predictions, references, sample_weight=None, k=None, ignore_ties=False):
	score = ndcg_score(y_true=references,
	y_score=predictions,
	k=k,
	sample_weight=sample_weight,
	ignore_ties=ignore_ties
	)
	if k is not None:
	return {"nDCG@" + str(k): score}
	else:
	return {"nDCG": score}