import numpy

def rmse(dataset1, dataset2, ignore=None):
   """
   title::
      rmse

   description::
      This method will compute the root-mean-squared error (RMSE) between
      two provided data sets.  The RMSE will be computed for the ensemble
      data.  If the RMSE is desired for a particular slice of the provided
      data, then the data sets provided should represent those slices.

   attributes::
      dataset1
         An array-like object containing the first data set.
      dataset2
         An array-like object containing the second data set.
      ignore
         A scalar value that will be ignored in the data sets.  This can
         be used to mask data in the provided data set from being
         included in the analysis. This value will be looked for in both
         of the provided data sets, and only an intersection of positions
         in the two data sets will be included in the computation. [default 
         is None]

   author::
      Carl Salvaggio

   copyright::
      Copyright (C) 2015, Rochester Institute of Technology

   license::
      GPL

   version::
      1.0.0

   disclaimer::
      This source code is provided "as is" and without warranties as to 
      performance or merchantability. The author and/or distributors of 
      this source code may have made statements about this source code. 
      Any such statements do not constitute warranties and shall not be 
      relied on by the user in deciding whether to use this source code.
      
      This source code is provided without any express or implied warranties 
      whatsoever. Because of the diversity of conditions and hardware under 
      which this source code may be used, no warranty of fitness for a 
      particular purpose is offered. The user is advised to test the source 
      code thoroughly before relying on it. The user must assume the entire 
      risk of using the source code.
   """

   # Make sure that the provided data sets are numpy ndarrays, if not
   # convert them and flatten te data sets for analysis
   if type(dataset1).__module__ != numpy.__name__:
      d1 = numpy.asarray(dataset1).flatten()
   else:
      d1 = dataset1.flatten()

   if type(dataset2).__module__ != numpy.__name__:
      d2 = numpy.asarray(dataset2).flatten()
   else:
      d2 = dataset2.flatten()

   # Make sure that the provided data sets are the same size
   if d1.size != d2.size:
      raise ValueError('Provided datasets must have the same size/shape')

   # Check if the provided data sets are identical, and if so, return 0
   # for the root-mean-squared error
   if numpy.array_equal(d1, d2):
      return 0

   # If specified, remove the values to ignore from the analysis and compute
   # the element-wise difference between the data sets
   if ignore is not None:
      index = numpy.intersect1d(numpy.where(d1 != ignore)[0], 
                                numpy.where(d2 != ignore)[0])
      error = d1[index].astype(numpy.float64) - d2[index].astype(numpy.float64)
   else:
      error = d1.astype(numpy.float64) - d2.astype(numpy.float64)

   # Compute the mean-squared error
   meanSquaredError = numpy.sum(error**2) / error.size

   # Return the root of the mean-square error
   return numpy.sqrt(meanSquaredError)