Content of statistical module

xquery version "3.0";

(:
 : Copyright 2006-2009 The FLWOR Foundation.
 :
 : Licensed under the Apache License, Version 2.0 (the "License");
 : you may not use this file except in compliance with the License.
 : You may obtain a copy of the License at
 :
 : http://www.apache.org/licenses/LICENSE-2.0
 :
 : Unless required by applicable law or agreed to in writing, software
 : distributed under the License is distributed on an "AS IS" BASIS,
 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 : See the License for the specific language governing permissions and
 : limitations under the License.
 :)

(:~
 : This is a library module offering a part of the set of statistical functions
 : defined by Microsoft Excel 2003.
 :
 : @see <a href="http://office.microsoft.com/en-us/excel/CH062528311033.aspx"
 : target="_blank">Excel 2003 Documentation: Statistical Functions</a>
 :
 : @author Daniel Turcanu
 : @project Zorba/Excel/Statistical
 :
 :)
module namespace  excel = "http://zorba.io/modules/excel/statistical" ;

(:~
 : Import excel-math module functions.
 :)
import module namespace excel-math="http://zorba.io/modules/excel/math";

declare namespace excel-err = "http://zorba.io/modules/excel/errors";

declare namespace ver = "http://zorba.io/options/versioning";
declare option ver:module-version "1.0";

(:~
 : Function for average function.
 : This function should not be used outside this module.
 : It counts all non-empty arguments from the sequence.
 : The string value of every argument is used for checking.
 : 
 : @param $numbers The sequence of values.
 : @return The count of non-empty string values.
 :)
declare %private function excel:count-non-empty($numbers as xs:anyAtomicType*) as xs:integer
{
  if (fn:empty($numbers)) then
    0
  else if (fn:string($numbers[1]) = "") then
    excel:count-non-empty(fn:subsequence($numbers, 2))
  else
    excel:count-non-empty(fn:subsequence($numbers, 2)) + 1
};

(:~
 : Returns the average (arithmetic mean) of the arguments.
 : Arguments can be empty values, otherwise must be castable to numeric.
 : If sequence is empty then zero is returned.
 : The sequence can be of any length.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052089941033.aspx
 : @param $numbers The sequence of numbers or empty values.
 : @return The sum of all numbers divided by the number of non-empty values.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 : @example test/Queries/excel/statistical/average1.xq
 : @example test/Queries/excel/statistical/average2.xq
 : @example test/Queries/excel/statistical/average3.xq
 :)
declare function excel:average($numbers as xs:anyAtomicType*) as xs:anyAtomicType
{
  let $count := excel:count-non-empty($numbers)
  return
    if ($count gt 0) then
      excel-math:sum((for $n in $numbers where fn:string($n) != "" return $n)) div $count
    else
      0 
};

(:~
 : Counts the number of cells that contain numbers or values castable to numeric.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052090261033.aspx
 : @param $numbers The sequence of values, of any length.
 : @return The count of numbers.
 : @example test/Queries/excel/statistical/count1.xq
 :)
declare function excel:count( $numbers as xs:anyAtomicType* )  as xs:integer
{
  if (fn:empty($numbers)) then
    0
  else
    if (excel-math:is-a-number($numbers[1])) then
      excel:count(fn:subsequence($numbers, 2)) + 1
    else
      excel:count(fn:subsequence($numbers, 2))
};

(:~
 : Counts the empty values in a sequence.
 : The empty values are the ones with string value "".
 : The value 0 is not counted.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052090281033.aspx
 : @param $cells the sequence of values, of any length
 : @return The count
 : @example test/Queries/excel/statistical/countblank1.xq
:)
declare function excel:countblank( $cells as xs:anyAtomicType* ) as xs:integer
{
  if (fn:empty($cells)) then
    0
  else
    if (fn:string($cells[1]) = "") then
      excel:countblank(fn:subsequence($cells, 2)) + 1
    else
      excel:countblank(fn:subsequence($cells, 2))
};

(:~
 : Returns the largest number in a sequence.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091701033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :           The sequence can be of any length.
 : @return The max
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @example test/Queries/excel/statistical/max1.xq
 : @example test/Queries/excel/statistical/max2.xq
 : @example test/Queries/excel/statistical/max3.xq
:)
declare function excel:max ( $numbers as xs:anyAtomicType* ) as xs:anyAtomicType
{
  fn:max( 
   (for $n_at in $numbers
    let $n := excel-math:cast-as-numeric($n_at)
    return $n
   )
   )
};

(:~
 : Returns the smallest number in a sequence.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091761033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :           The sequence can be of any length.
 : @return The min
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @example test/Queries/excel/statistical/min1.xq
 : @example test/Queries/excel/statistical/min2.xq
:)
declare function excel:min ( $numbers as xs:anyAtomicType* ) as xs:anyAtomicType
{
  fn:min( 
   (for $n_at in $numbers
    let $n := excel-math:cast-as-numeric($n_at)
    return $n
   )
   )
};

(:~
 : Returns the median of the given numbers. 
 : The median is the number in the middle of a set of numbers.
 : Half the numbers have values that are greater than the median, 
 : and half the numbers have values that are less than the median. 
 : 
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091741033.aspx
 : @param $numbers the sequence of numbers, of any length
 : @return for odd count of numbers return the number in the middle of the sorted sequence.
 :       For even count of numbers return the average of the two numbers in the middle.
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @example test/Queries/excel/statistical/median1.xq
 : @example test/Queries/excel/statistical/median2.xq
:)
declare function excel:median( $numbers as xs:anyAtomicType* ) as xs:anyAtomicType
{
  let $number_count := excel:count( $numbers )
  let $sorted_numbers := excel-math:sort-numbers( $numbers ) return
  if ($number_count mod 2 != 0) then
    $sorted_numbers[$number_count idiv 2 + 1]
  else
    if ($number_count = 0) then
      0
    else
      ($sorted_numbers[$number_count idiv 2] + $sorted_numbers[$number_count idiv 2 + 1] ) div 2
};

(:~
 : Returns the most frequently occurring, or repetitive, value in a sequence.
 : Arguments must be castable to numeric.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091831033.aspx
 : @param $numbers the sequence of numbers, of any length
 : @return The most occuring number
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @error fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA") if there are no duplicate numbers
 : @example test/Queries/excel/statistical/mode1.xq
 : @example test/Queries/excel/statistical/mode2.xq
 : @example test/Queries/excel/statistical/mode3.xq
:)
declare function excel:mode( $numbers as xs:anyAtomicType* ) as xs:anyAtomicType
{
  if ( fn:empty($numbers)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA"), "Mode function: empty sequence")
  else
  let $result := 
  ( for $n_at in fn:distinct-values($numbers) 
    let $n := excel-math:cast-as-numeric($n_at)
    let $count := fn:count( (for $d in $numbers where fn:string($d) = fn:string($n) return $d) )
    where $count > 1 
    order by $count descending
    return $n
  ) return 
  if (fn:empty($result)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA"), "Mode function: no duplicate elements")
  else
    $result[1]
};

(:~
 : Returns the k-th percentile of values in a sequence.
 : If k is not a multiple of 1/(n - 1), 
 :   PERCENTILE interpolates to determine the value at the k-th percentile.  
 : The function is computed by (max-min)*k + min
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092111033.aspx
 : @param $numbers the sequence of numbers, of any length
 : @param $k_at the percentile, with value between 0 .. 1 inclusive
 : @return The computed percentile
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @error excel-err:Num if percentile is not between 0 .. 1
 : @example test/Queries/excel/statistical/percentile1.xq
 : @example test/Queries/excel/statistical/percentile2.xq
 : @example test/Queries/excel/statistical/percentile3.xq
:)
declare function excel:percentile( $numbers as xs:anyAtomicType*, $k_at as xs:anyAtomicType) as xs:anyAtomicType
{
  let $k := excel-math:cast-as-numeric($k_at) return
  if ($k < 0 or $k > 1) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Percentile function: k must be a value between 0 and 1 inclusive")
  else
    let $max := excel:max($numbers)
    let $min := excel:min($numbers) return
    ($max - $min) * $k + $min
};



(:~
 : Function for AVEDEV.
 : This function should not be used outside this module.
 : Computes formula sum(abs(x - average)) for every x in $numbers
 :
 : @param $numbers The sequence of numbers or values castable to numeric.
 :        Sequence can be of any length.
 : @param $average The average of all numbers, computed with function AVERAGE.
 : @return The result of the formula.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 :)
declare %private function excel:sum-deviations(
  $numbers as xs:anyAtomicType*,
  $average as xs:anyAtomicType) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    0
  else
    fn:abs(excel-math:cast-as-numeric($numbers[1]) - $average) + excel:sum-deviations(fn:subsequence($numbers, 2), $average)
};

(:~
 : Returns the average of the absolute deviations of data points from their mean.
 : The formula is sum(abs(x - average_x))/n, where n is the count of x in the sequence.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052089931033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :     Sequence can be of any length from 1 up.
 : @return The formula result
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @example test/Queries/excel/statistical/priority1/avedev1.xq
:)
declare function excel:avedev($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  let $average := excel:average($numbers) return
  excel:sum-deviations($numbers, $average) div excel:count($numbers)
};

(:~
 : Function for AVERAGEA.
 : This function should not be used outside this module.
 : This function adds all values that are castable to numeric.
 :
 : @param $numbers A sequence of any values, any length.
 : @return The sum of numbers.
 :)
declare %private function excel:add-all-cells($numbers as xs:anyAtomicType*) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    0
  else if (excel-math:is-a-number($numbers[1])) then
    excel-math:cast-as-numeric($numbers[1]) + excel:add-all-cells(fn:subsequence($numbers, 2))
  else (: if (fn:string($numbers[1]) = "") then :)
    excel:add-all-cells(fn:subsequence($numbers, 2))
(:  else
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Value"), "Provided value is not a number or empty cell ", $numbers[1])
:)
};

(:~
 : Calculates the average (arithmetic mean) of the values in the sequence of arguments.
 : Arguments can be of any type.
 : The numbers are added, and the sum is divided by the size of entire sequence.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052089951033.aspx
 : @param $numbers the sequence of values of any type.
 :    The sequence can be of any length, from 1 up.
 : @return The result
 : @example test/Queries/excel/statistical/priority1/averagea1.xq
:)
declare function excel:averagea($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  excel:add-all-cells($numbers) div fn:count($numbers)
};

(:~
 : Counts the number of values that are not empty.
 : Empty values are the one with string value "".
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052090271033.aspx
 : @param $numbers the sequence of values of any type, any length
 : @return The count of non-empty values
 : @example test/Queries/excel/statistical/priority1/counta1.xq
:)
declare function excel:counta($numbers as xs:anyAtomicType*) as xs:integer
{
  if (fn:empty($numbers)) then
    0
  else
  if (fn:string($numbers[1]) != "") then
    1 + excel:counta(fn:subsequence($numbers, 2))
  else
    excel:counta(fn:subsequence($numbers, 2))
};

(:~
 : Returns the k-th largest value in a data set. 
 : If n is the number of data points in a range, 
 :   then LARGE(array,1) returns the largest value, 
 :   and LARGE(array,n) returns the smallest value.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091511033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :           The sequence can be of any length, from 1 up.
 : @param $k the position of largest value, with value from 1 to count of values
 : @return The k-th largest value as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @error excel-err:Num if the sequence is empty
 : @error excel-err:Num if k is not a value between 1 and the sequence size
 : @example test/Queries/excel/statistical/priority1/large1.xq
 : @example test/Queries/excel/statistical/priority1/large2.xq
 : @example test/Queries/excel/statistical/priority1/large3.xq
:)
declare function excel:large($numbers as xs:anyAtomicType*, $k as xs:integer) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Large function: value list must not be empty")  
  else if ($k > fn:count($numbers) or $k le 0) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Large function: k must be between 1 and the count of numbers ", $k)
  else
    let $ordered_numbers :=
      (for $n in $numbers 
       let $nn := excel-math:cast-as-numeric($n)
       order by $nn descending
       return $nn
      ) return
     $ordered_numbers[$k]
};

(:~
 : Returns the largest value in a list of arguments.
 : In this implementation there is no difference between MAX and MAXA.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091711033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric
 :           The sequence can be of any length.
 : @return The max
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @example test/Queries/excel/statistical/priority1/maxa1.xq
:)
declare function excel:maxa($numbers as xs:anyAtomicType*) as xs:anyAtomicType
{
  excel:max($numbers)
};

(:~
 : Returns the smallest value in a list of arguments.
 : In this implementation there is no difference between MAX and MAXA.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052091771033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric
 :           The sequence can be of any length.
 : @return The min
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @example test/Queries/excel/statistical/priority1/mina1.xq
:)
declare function excel:mina($numbers as xs:anyAtomicType*) as xs:anyAtomicType
{
  excel:min($numbers)
};

(:~
 : Returns the rank of a number in a list of numbers. 
 : The rank of a number is its size relative to other values in a list. 
 : (If you were to sort the list, the rank of the number would be its position.)
 : RANK gives duplicate numbers the same rank.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092311033.aspx
 : @param $x The number whose rank you want to find.
 : @param $numbers The sequence of numbers or values castable to numbers.
 :        The sequence can be of any length.
 : @param $order_ascending <dl>A boolean having the meaning:
 :        <dt>false</dt><dd>then rank the number as if the sequence was sorted in descending order.</dd>
 :        <dt>true</dt> <dd>then rank the number as if the sequence was sorted in ascending order.</dd></dl>
 : @return The rank of $x.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 : @example test/Queries/excel/statistical/priority1/rank1.xq
 : @example test/Queries/excel/statistical/priority1/rank2.xq
 : @example test/Queries/excel/statistical/priority1/rank4.xq
 :)
declare function excel:rank(
  $x                as xs:anyAtomicType, 
  $numbers          as xs:anyAtomicType*, 
  $order_ascending  as xs:boolean) as xs:decimal
{
  let $ordered_numbers :=
    if ($order_ascending) then (
      for $n in $numbers 
      let $nn := excel-math:cast-as-numeric($n)
      order by $nn ascending
      return $nn
    ) else (
      for $n in $numbers 
      let $nn := excel-math:cast-as-numeric($n)
      order by $nn descending
      return $nn
    )
  let $xnum := excel-math:cast-as-numeric($x) 
  let $rank :=
    (
      for $i at $pos in $ordered_numbers
      where $xnum = $i or $order_ascending and $xnum < $i
                     or fn:not($order_ascending) and $xnum > $i
      return 
        if ($xnum = $i) then
          $pos
        else if ($pos = 1) then
          0
        else
          ($pos - 1) + ($xnum - $ordered_numbers[$pos - 1]) div ($ordered_numbers[$pos] - $ordered_numbers[$pos - 1])
    )
  return 
    if (fn:empty($rank)) then
      fn:count($numbers)
    else
      $rank[1]
};

(:~
 : This RANK function is same as the above, only that $order_ascending is set by default to false.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092311033.aspx
 : @param $x The number whose rank you want to find.
 : @param $numbers the sequence of numbers or values castable to numbers.
 :        The sequence can be of any length.
 : @return The rank of $x.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 : @example test/Queries/excel/statistical/priority1/rank3.xq
 : @example test/Queries/excel/statistical/priority1/rank5.xq
:)
declare function excel:rank(
  $x        as xs:anyAtomicType, 
  $numbers  as xs:anyAtomicType*) as xs:decimal
{
  excel:rank($x, $numbers, fn:false())
};

(:~
 : Returns the rank of a value in a data set as a percentage of the data set.
 : If x does not match one of the values in array, 
 :   PERCENTRANK interpolates to return the correct percentage rank. <p/>
 : The formula is uses: (RANK - 1) / (size - 1) .
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092121033.aspx
 : @param $numbers the sequence of numbers or values castable to numbers.
 :    The sequence can be of any length, from 1 up.
 : @param $x is the value for which you want to know the rank
 : @return The percentage of rank. 
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @error excel-err:Num if the sequence is zero length
 : @example test/Queries/excel/statistical/priority1/percentrank1.xq
 : @example test/Queries/excel/statistical/priority1/percentrank2.xq
 : @example test/Queries/excel/statistical/priority1/percentrank3.xq
 : @example test/Queries/excel/statistical/priority1/percentrank4.xq
 : @example test/Queries/excel/statistical/priority1/percentrank5.xq
:)
declare function excel:percentrank($numbers as xs:anyAtomicType*, $x as xs:anyAtomicType) as xs:decimal
{
  if (fn:empty($numbers)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Percentrank function: value list must not be empty")
  else  
    let $rank := excel:rank($x, $numbers, fn:true()) return
    if ($rank = 0) then
      0
    else
      ($rank - 1) div (fn:count($numbers) - 1)
};

(:~
 : Returns the quartile of a data set. 
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092261033.aspx
 : @param $numbers sequence of numbers or values castable to numbers.
 :      The sequence can be of any length, from 1 up.
 : @param $quart <dl>one of the values 0, 1, 2, 3, 4 with meaning:
 :     <dt>0</dt> <dd> compute minimum value</dd>
 :     <dt>1</dt> <dd> compute first quartile (25th percentile)</dd>
 :     <dt>2</dt> <dd> compute median value (50th percentile)</dd>
 :     <dt>3</dt> <dd> compute third quartile (75th percentile)</dd>
 :     <dt>4</dt> <dd> compute maximum value</dd></dl>
 :  @return the computed quartile, as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type 
 : @error excel-err:Num if the sequence is zero length
 : @error excel-err:Num if $quart is not one of the values 0, 1, 2, 3, 4
 : @example test/Queries/excel/statistical/priority1/quartile1.xq
 : @example test/Queries/excel/statistical/priority1/quartile2.xq
 : @example test/Queries/excel/statistical/priority1/quartile3.xq
 : @example test/Queries/excel/statistical/priority1/quartile4.xq
 : @example test/Queries/excel/statistical/priority1/quartile5.xq
:)
declare function excel:quartile($numbers as xs:anyAtomicType*, $quart as xs:integer) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Quartile function: value list must not be empty")
  else  
  if ($quart = 0) then
    excel:min($numbers)
  else
  if ($quart = 1) then
    let $r := (fn:count($numbers) + 3) div 4
    let $rint := xs:integer($r)
    let $rrem := $r - $rint 
    let $sorted_numbers := excel-math:sort-numbers( $numbers ) return
      ($numbers[$rint + 1] - $numbers[$rint]) * $rrem + $numbers[$rint] 
  else
  if ($quart = 2) then
    excel:median($numbers)
  else
  if ($quart = 3) then
    let $r := (3 * fn:count($numbers) + 1) div 4
    let $rint := xs:integer($r)
    let $rrem := $r - $rint 
    let $sorted_numbers := excel-math:sort-numbers( $numbers ) return
      ($numbers[$rint + 1] - $numbers[$rint]) * $rrem + $numbers[$rint] 
  else
  if ($quart = 4) then
    excel:max($numbers)
  else
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Quartile function: quart should be between 0 and 4 :", $quart)
};

(:~
 : This function computes the k-th smallest value in a data set. 
 : Use this function to return values with a particular relative standing in a data set.
 : If n is the number of data points in array, SMALL(array,1) equals the smallest value, 
 :   and SMALL(array,n) equals the largest value. 
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092661033.aspx
 : @param $numbers A sequence of numbers or values castable to numeric.
 :        The sequence can be of any length, from 1 up.
 : @param $k The position (from the smallest) in the sequence of data to return.
 :        Must have value between 1 and size of sequence.
 : @return The k-th smallest value of $numbers.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 : @error excel-err:Num if the sequence is zero length.
 : @error excel-err:Num if $k is not a value between 1 and the size of sequence.
 : @example test/Queries/excel/statistical/priority1/small1.xq
 : @example test/Queries/excel/statistical/priority1/small2.xq
:)
declare function excel:small($numbers as xs:anyAtomicType*, $k as xs:integer) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Small function: value list must not be empty")
  else if ($k gt fn:count($numbers) or $k le 0) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Small function: k must be between 1 and the count of numbers ", $k)
  else
    let $ordered_numbers := (
        for $n in $numbers 
        let $nn := excel-math:cast-as-numeric($n)
        order by $nn ascending
        return $nn
      )
    return
      $ordered_numbers[$k]
};


(:~
 : Function for VAR, VARA, VARP, VARPA and SLOPE.
 : This function should not be used outside this module.
 : It computes formula sum((x - average_x)^2) for all x in $numbers.
 :
 : @param $numbers the sequence of numbers or values castable to numbers.
 :        The sequence can be of any length.
 : @param $average The precomputed average over the sequence.
 : @return The result as numeric type.
 : @error excel-err:Value if the parameters cannot be casted to numeric type.
 :)
declare %private function excel:sumsq-deviations($numbers as xs:anyAtomicType*, $average as xs:anyAtomicType) as xs:anyAtomicType
{
  if (fn:empty($numbers)) then
    0
  else
    let $val := excel-math:cast-as-numeric($numbers[1]) - $average
    return
      $val * $val + excel:sumsq-deviations(fn:subsequence($numbers, 2), $average)
};

(:~
 : Estimates variance based on a sample.<p/>
 : The formula is sum(x - average_x)^2 / (n - 1).<p/>
 : average_x is computed with AVERAGE function.<p/>
 : n is the count of numbers from the sequence, excluding empty values.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052093301033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :       The sequence can be of any length, from 1 up.
 : @return The variance, as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @example test/Queries/excel/statistical/priority1/var1.xq
 :)
declare function excel:var($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  let $average := excel:average($numbers)
  return
    excel:sumsq-deviations($numbers, $average) div (excel:count($numbers) - 1)
};

(:~
 : Estimates variance based on a sample.<p/>
 : The formula is sum(x - average_x)^2 / (n - 1).<p/>
 : average_x is computed with AVERAGE function.<p/>
 : n is the size of sequence, including empty values.<p/>
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052093311033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :       The sequence can be of any length, from 1 up.
 : @return The variance, as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @example test/Queries/excel/statistical/priority1/vara1.xq
:)
declare function excel:vara($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  let $average := excel:average($numbers) return
  excel:sumsq-deviations($numbers, $average) div (fn:count($numbers) - 1)
};

(:~
 : Calculates variance based on the entire population.<p/>
 : The formula is sum(x - average_x)^2 / n.<p/>
 : average_x is computed with AVERAGE function.<p/>
 : n is the count of numbers from the sequence, excluding empty values.<p/>
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052093321033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :       The sequence can be of any length, from 1 up.
 : @return The variance, as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @example test/Queries/excel/statistical/priority1/varp1.xq
:)
declare function excel:varp($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  let $average := excel:average($numbers) return
  excel:sumsq-deviations($numbers, $average) div excel:count($numbers)
};

(:~
 : Calculates variance based on the entire population.<p/>
 : The formula is sum(x - average_x)^2 / n.<p/>
 : average_x is computed with AVERAGE function.<p/>
 : n is the size of sequence, including empty values.<p/>
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052093321033.aspx
 : @param $numbers the sequence of numbers or values castable to numeric.
 :       The sequence can be of any length, from 1 up.
 : @return The variance, as numeric type
 : @error excel-err:Value if the parameters cannot be casted to numeric type
 : @example test/Queries/excel/statistical/priority1/varpa1.xq
:)
declare function excel:varpa($numbers as xs:anyAtomicType+) as xs:anyAtomicType
{
  let $average := excel:average($numbers) return
  excel:sumsq-deviations($numbers, $average) div fn:count($numbers)
};

(:~
 : Function for PROB function.
 : This function should not be used outside this module.
 : Computes the sum over a sequence of numbers.
 : Checks if the values are between 0 and 1.
 :
 : @param $prob_range The sequence of probabilities.
 : @return The sum of probabilities. This should be 1.
 : @error excel-err:Num if any probability is not between 0 and 1.
 : @error excel-err:Value if any parameter is not castable to numeric.
:)
declare %private function excel:sum-prob($prob_range as xs:anyAtomicType*) as xs:anyAtomicType
{
  if (fn:empty($prob_range)) then
    0
  else
    let $prob_num := excel-math:cast-as-numeric($prob_range[1])
  return
    if ($prob_num < 0 or $prob_num > 1) then
      fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Prob function: prob values should be between 0 and 1 ", $prob_num)
    else
      $prob_num + excel:sum-prob(fn:subsequence($prob_range, 2))
};

(:~
 : Function for PROB function.
 : This function should not be used outside this module.
 : Checks the prob range and x range if they have the same number of values.
 : Adds all probabilities corresponding to values between range_lower_limit and upper_limit.
 :
 : @param $x_range The sequence of x values.
 : @param $prob_range The sequence of probabilies associated to x values.
 : @param $range_lower_limit The lower limit of the range to compute the probability.
 : @param $upper_limit The upper limit of the range to compute the probability.
 : @return The sum of probabilities.
 : @error excel-err:Value if any parameter is not castable to numeric.
 : @error excel-err:Num if x_range and prob_range do not have the same number of values.
 :)
declare %private function excel:sum-prob-x(
  $x_range            as xs:anyAtomicType*,
  $prob_range         as xs:anyAtomicType*,
  $range_lower_limit  as xs:anyAtomicType,
  $upper_limit        as xs:anyAtomicType) as xs:anyAtomicType
{
  if (fn:empty($x_range) and fn:not(fn:empty($prob_range))) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Prob function: x range and prob range should have the same number of elements")
  else if (fn:empty($prob_range) and fn:not(fn:empty($x_range))) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Prob function: x range and prob range should have the same number of elements")
  else if (fn:empty($prob_range) and fn:empty($x_range)) then
    0
  else
    let $x := excel-math:cast-as-numeric($x_range[1])
    let $this_prob :=
      if ($x ge $range_lower_limit and $x le $upper_limit) then
        excel-math:cast-as-numeric($prob_range[1])
      else
        0 
    return
      $this_prob + excel:sum-prob-x(
        fn:subsequence($x_range, 2),
        fn:subsequence($prob_range, 2),
        $range_lower_limit,
        $upper_limit)
};

(:~
 : Returns the probability that values in a range are between two limits.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092221033.aspx
 : @param $x_range is the range of numeric values of x with which there are associated probabilities.
 :       This does not need to be ordered.
 : @param $prob_range is a set of probabilities associated with values in x_range.
 : @param $range_lower_limit is the lower bound on the value for which you want a probability.
 : @param $upper_limit  is the upper bound on the value for which you want a probability.
 : @return The probability of the entire range
 : @error excel-err:Num if any probability is not between 0 and 1
 : @error excel-err:Num if the sum of probabilities is not equal to 1
 : @error excel-err:Value if any parameter is not castable to numeric
 : @error excel-err:Num if x_range and prob_range do not have the same number of values
 : @example test/Queries/excel/statistical/priority2/prob2.xq
:)
declare function excel:prob($x_range as xs:anyAtomicType+,
                            $prob_range as xs:anyAtomicType+,
                            $range_lower_limit as xs:anyAtomicType,
                            $upper_limit as xs:anyAtomicType) as xs:anyAtomicType
{
  let $prob_sum := excel:sum-prob($prob_range) return
  if ($prob_sum != 1) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Prob function: prob sum should equal 1")
  else
    excel:sum-prob-x($x_range, $prob_range, 
                    excel-math:cast-as-numeric($range_lower_limit), 
                    excel-math:cast-as-numeric($upper_limit))
};

(:~
 : This is the same as above, only that upper_limit is not specified.
 : The probability is computed only for range_lower_limit.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092221033.aspx
 : @param $x_range is the range of numeric values of x with which there are associated probabilities.
 :       This does not need to be ordered.
 : @param $prob_range is a set of probabilities associated with values in x_range.
 : @param $range_lower_limit is the value for which you want a probability.
 : @return The probability of the range_lower_limit value
 : @error excel-err:Num if any probability is not between 0 and 1
 : @error excel-err:Num if the sum of probabilities is not equal to 1
 : @error excel-err:Value if any parameter is not castable to numeric
 : @error excel-err:Num if x_range and prob_range do not have the same number of values
 : @example test/Queries/excel/statistical/priority2/prob1.xq
 :)
declare function excel:prob($x_range as xs:anyAtomicType+,
                            $prob_range as xs:anyAtomicType+,
                            $range_lower_limit as xs:anyAtomicType) as xs:anyAtomicType
{
  excel:prob($x_range, $prob_range, $range_lower_limit, $range_lower_limit)
};

(:~
 : Function for SLOPE function.
 : This function should not be used outside this module.
 : It computes the formula:<p/>
 : sum((x - average_x)(y - average_y)) <p/>
 : where average_x and average_y are computed with AVERAGE function.
 :
 : @param $x_numbers The sequence of x numbers.
 : @param $x_average The precomputed AVERAGE over the x_numbers.
 : @param $y_numbers The sequence of y numbers.
 : @param $y_average The precomputed AVERAGE over the y_numbers.
 : @return The formula result, as numeric type.
 : @error excel-err:Value if any parameter cannot be casted to numeric.
 : @error fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA") if there are different numbers of x's and y's.
 :)
declare %private function excel:sum-x-y-deviations(
  $x_numbers as xs:anyAtomicType*, 
  $x_average as xs:anyAtomicType,
  $y_numbers as xs:anyAtomicType*, 
  $y_average as xs:anyAtomicType) as xs:anyAtomicType
{
  if (fn:empty($x_numbers) and fn:not(fn:empty($y_numbers))) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA"), "Slope function: different number of x's and y's")
  else if (fn:empty($y_numbers) and fn:not(fn:empty($x_numbers))) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA"), "Slope function: different number of x's and y's")
  else if (fn:empty($x_numbers) and fn:empty($y_numbers)) then
    0
  else
    (excel-math:cast-as-numeric($x_numbers[1]) - $x_average) * 
    (excel-math:cast-as-numeric($y_numbers[1]) - $y_average) + 
    excel:sum-x-y-deviations(
      fn:subsequence($x_numbers, 2),$x_average,
      fn:subsequence($y_numbers, 2),$y_average)
};

(:~
 : Returns the slope of the linear regression line through data points in known_y's and known_x's.
 : The slope is the vertical distance divided by the horizontal distance between 
 :   any two points on the line, which is the rate of change along the regression line.
 : It computes the formula:<p/>
 : sum((x - average_x)(y - average_y)) / sum((x - average_x)^2)  <p/>
 : where average_x and average_y are computed with AVERAGE function.
 : 
 : @see http://office.microsoft.com/en-us/excel/HP052092641033.aspx
 : @param $known_y the sequence of y numbers.
 :    The sequence can be of any length, from 1 up.  
 : @param $known_x the sequence of x numbers.
 :    The sequence can be of any length, from 1 up.  
 : @return The slope value, as numeric type
 : @error excel-err:Value if any parameter cannot be casted to numeric
 : @error fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA") if there are different numbers of x's and y's
 : @error fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA") if any sequence is empty
 : @error excel-err:Div0 if all x's are equal
 : @example test/Queries/excel/statistical/priority2/slope1.xq
:)
declare function excel:slope($known_y as xs:anyAtomicType+,
                       $known_x as xs:anyAtomicType+) as xs:anyAtomicType
{
  if (fn:empty($known_y) or fn:empty($known_x)) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:NA"), "Slope function: known_x and known_y cannot be empty sequences")
  else
  let $x_average := excel:average($known_x) 
  let $y_average := excel:average($known_y) 
  let $xsq_dev := excel:sumsq-deviations($known_x, $x_average) return
  if ($xsq_dev = 0) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Div0"), "Slope function: all x's are equal")
  else
  let $x_y_dev := excel:sum-x-y-deviations($known_x, $x_average, $known_y, $y_average) return
  $x_y_dev div $xsq_dev
};

(:~
 : Returns a normalized value from a distribution characterized by mean and standard_dev.<p/>
 : The formula is (x - mean) / standard_dev .
 :
 : @see http://office.microsoft.com/en-us/excel/HP052092731033.aspx
 : @param $x is the value you want to normalize
 : @param $mean  is the arithmetic mean of the distribution.
 : @param $standard_dev is the standard deviation of the distribution.
 : @return The normalized x, as numeric type
 : @error excel-err:Value if any parameter cannot be casted to numeric
 : @error excel-err:Num if standard_dev is a value smaller than zero or equal
 : @example test/Queries/excel/statistical/priority2/standardize1.xq
:)
declare function excel:standardize($x as xs:anyAtomicType,
                                   $mean as xs:anyAtomicType,
                                   $standard_dev as xs:anyAtomicType) as xs:double
{
  if ($standard_dev le 0) then
    fn:error(fn:QName("http://zorba.io/modules/excel/errors", "excel-err:Num"), "Standardize function: standard_dev must be positive ", $standard_dev)
  else
    (excel-math:cast-as-numeric($x) - excel-math:cast-as-numeric($mean)) div excel-math:cast-as-numeric($standard_dev)
};