Source code for benchbuild.statistics
"""
Handle all statsitic related classes and methods.
"""
import logging
from benchbuild.extensions import Extension
from benchbuild.utils.schema import Session
# The import of scipy and all of its usages are commented out, since its import
# takes too much time for the buildbot. To use the statistics uncomment the
# import and the line containing the stats.ttest function of scipy.
# import scipy
LOG = logging.getLogger(__name__)
TIMEOUT = 1
[docs]class Statistics(Extension):
"""
Extend a run to be repeated until it reaches a statistically significance
specified by the user.
An example on how to use this extension can be found in the Pollytest
Experiment.
"""
def __init__(self, project, experiment, *extensions, config=None):
self.project = project
self.experiment = experiment
super(Statistics, self).__init__(*extensions, config=config)
[docs] def t_test(self, *results, significance=0.95):
"""
Runs a t-test on a given set of results.
Returns:
True if the null hypothesis that the result was not significant
was rejected, False otherwise.
"""
for result in results:
del result # Unused temporarily
t_statistic = 0
p_value = 0
#t_statistic, p_value = scipy.stats.ttest_1samp(result, TRUE_MU)
LOG.debug("t-statistic = %f, pvalue = %f", t_statistic, p_value)
return p_value >= 1 - significance
def __call__(self, *args, timeout=TIMEOUT, **kwargs):
"""
The call of this extension runs the following extensions until the
timeout was reached or a run was significant enough to withdraw the
nullhypothesis.
Kwargs:
timeout: The amount of trys the user wants to give the experiment
before it gets interrupted.
Returns:
The run info object after executing the
afterwards following extensions.
"""
iterator = 0
session = Session()
while iterator < timeout:
#get an run_info object after executing the run with its extensions
ri_object = self.call_next(*args, **kwargs)
#check if the experiment defines the result function
if hasattr(self.experiment, 'res_func'):
results = self.experiment.res_func(ri_object)
if self.t_test(results):
LOG.info("The run was significant.")
break
#check if this was the last iteration
if iterator == (timeout - 1):
LOG.warning(
"No significant run happened before the timeout!")
iterator += 1
# no need to repeat the run without a result function
else:
break
#Commit the database session containing all runs
session.commit()
LOG.info("Overall one command was executed %s times.", iterator)
return ri_object