import numpy
def rmse(dataset1, dataset2, ignore=None):
"""
title::
rmse
description::
This method will compute the root-mean-squared error (RMSE) between
two provided data sets. The RMSE will be computed for the ensemble
data. If the RMSE is desired for a particular slice of the provided
data, then the data sets provided should represent those slices.
attributes::
dataset1
An array-like object containing the first data set.
dataset2
An array-like object containing the second data set.
ignore
A scalar value that will be ignored in the data sets. This can
be used to mask data in the provided data set from being
included in the analysis. This value will be looked for in both
of the provided data sets, and only an intersection of positions
in the two data sets will be included in the computation. [default
is None]
author::
Carl Salvaggio
copyright::
Copyright (C) 2015, Rochester Institute of Technology
license::
GPL
version::
1.0.0
disclaimer::
This source code is provided "as is" and without warranties as to
performance or merchantability. The author and/or distributors of
this source code may have made statements about this source code.
Any such statements do not constitute warranties and shall not be
relied on by the user in deciding whether to use this source code.
This source code is provided without any express or implied warranties
whatsoever. Because of the diversity of conditions and hardware under
which this source code may be used, no warranty of fitness for a
particular purpose is offered. The user is advised to test the source
code thoroughly before relying on it. The user must assume the entire
risk of using the source code.
"""
# Make sure that the provided data sets are numpy ndarrays, if not
# convert them and flatten te data sets for analysis
if type(dataset1).__module__ != numpy.__name__:
d1 = numpy.asarray(dataset1).flatten()
else:
d1 = dataset1.flatten()
if type(dataset2).__module__ != numpy.__name__:
d2 = numpy.asarray(dataset2).flatten()
else:
d2 = dataset2.flatten()
# Make sure that the provided data sets are the same size
if d1.size != d2.size:
raise ValueError('Provided datasets must have the same size/shape')
# Check if the provided data sets are identical, and if so, return 0
# for the root-mean-squared error
if numpy.array_equal(d1, d2):
return 0
# If specified, remove the values to ignore from the analysis and compute
# the element-wise difference between the data sets
if ignore is not None:
index = numpy.intersect1d(numpy.where(d1 != ignore)[0],
numpy.where(d2 != ignore)[0])
error = d1[index].astype(numpy.float64) - d2[index].astype(numpy.float64)
else:
error = d1.astype(numpy.float64) - d2.astype(numpy.float64)
# Compute the mean-squared error
meanSquaredError = numpy.sum(error**2) / error.size
# Return the root of the mean-square error
return numpy.sqrt(meanSquaredError)