## Module statistics.py
##
## Copyright (c) 2013 Steven D'Aprano.
##
## Permission is hereby granted, free of charge, to any person obtaining
## a copy of this software and associated documentation files (the
## "Software"), to deal in the Software without restriction, including
## without limitation the rights to use, copy, modify, merge, publish,
## distribute, sublicense, and/or sell copies of the Software, and to
## permit persons to whom the Software is furnished to do so, subject to
## the following conditions:
##
## The above copyright notice and this permission notice shall be
## included in all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
## IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
## CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
## TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
## SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
Statistics module for Python 3.3 and better.
Calculate statistics of data.
This module provides the following functions and classes:
Summary:
================== =============================================
Function Description
================== =============================================
add_partial Utility for performing high-precision sums.
mean Arithmetic mean (average) of data.
median Median (middle value) of data.
mode Mode (most common value) of data.
pstdev Population standard deviation of data.
pvariance Population variance of data.
StatisticsError Exception for statistics errors.
stdev Sample standard deviation of data.
sum High-precision sum of data.
variance Sample variance of data.
================== =============================================
Examples
--------
>>> mean([-1.0, 2.5, 3.25, 5.75])
2.625
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
4.38961843444...
"""
# Module metadata.
__version__ = "0.1a"
__date__ = "2013-07-31"
__author__ = "Steven D'Aprano"
__author_email__ = "steve+python@pearwood.info"
__all__ = [ 'add_partial', 'sum', 'StatisticsError',
'pstdev', 'pvariance', 'stdev', 'variance',
'mean', 'median', 'mode',
]
import collections
import math
import numbers
import operator
from builtins import sum as _sum
# === Exceptions ===
class StatisticsError(ValueError):
pass
# === Public utilities ===
# Thanks to Raymond Hettinger for his recipe:
# http://code.activestate.com/recipes/393090/
def add_partial(x, partials):
"""Helper function for full-precision summation of binary floats.
Add float x in place to the list partials, keeping the sum exact with no
rounding error.
Arguments
---------
x
Must be a float.
partials
A list containing the partial sums.
Description
-----------
Initialise partials to be an empty list. Then for each float value ``x``
you wish to add, call ``add_partial(x, partials)``.
When you are done, call the built-in ``sum(partials)`` to round the
result to the standard float precision.
If any x is not a float, or partials is not initialised to an empty
list, results are undefined.
Examples
--------
>>> partials = []
>>> for x in (0.125, 1e100, 1e-50, 0.125, 1e100):
... add_partial(x, partials)
>>> partials
[0.0, 1e-50, 0.25, 2e+100]
"""
# Keep these as assertions so they can be optimized away.
assert isinstance(x, float) and isinstance(partials, list)
if not partials:
partials.append(0.0) # Holder for NAN/INF values.
if not math.isfinite(x):
partials[0] += x
return
# Rounded x+y stored in hi with the round-off stored in lo. Together
# hi+lo are exactly equal to x+y. The inner loop applies hi/lo summation
# to each partial so that the list of partial sums remains exact.
# Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
# www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps
i = 1
for y in partials[1:]:
if abs(x) < abs(y):
x, y = y, x
hi = x + y
lo = y - (hi - x)
if lo:
partials[i] = lo
i += 1
x = hi
assert i > 0
partials[i:] = [x]
def sum(data, start=0):
"""sum(data [, start]) -> value
Return a high-precision sum of the given numeric data. If optional
argument ``start`` is given, it is added to the total. If ``data`` is
empty, ``start`` (defaulting to 0) is returned.
Examples
--------
>>> sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
11.0
Float sums are calculated using high-precision floating point arithmetic
that can avoid some sources of round-off error:
>>> sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
1000.0
Fractions and Decimals are also supported:
>>> from fractions import Fraction as F
>>> sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
Fraction(63, 20)
Decimal sums honour the context:
>>> import decimal
>>> D = decimal.Decimal
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> sum(data)
Decimal('0.6963')
>>> with decimal.localcontext(
... decimal.Context(prec=2, rounding=decimal.ROUND_DOWN)):
... sum(data)
Decimal('0.68')
Limitations
-----------
``sum`` supports mixed arithmetic with the following limitations:
- mixing Fractions and Decimals raises TypeError;
- mixing floats with either Fractions or Decimals coerces to float,
which may lose precision;
- complex numbers are not supported.
These limitations may change without notice in future versions.
"""
if not isinstance(start, numbers.Number):
raise TypeError('sum only accepts numbers')
total = start
data = iter(data)
x = None
if not isinstance(total, float):
# Non-float sum. If we find a float, we exit this loop and continue
# with the float code below. Until that happens, we keep adding.
for x in data:
if isinstance(x, float):
# Convert running total to a float. See comment below for
# why we do it this way.
total = type(total).__float__(total)
break
total += x
else:
# No break, so we're done.
return total
# High-precision float sum.
assert isinstance(total, float)
partials = []
add_partial(total, partials)
if x is not None:
add_partial(x, partials)
for x in data:
try:
# Don't call float() directly, as that converts strings and we
# don't want that. Also, like all dunder methods, we should call
# __float__ on the class, not the instance.
x = type(x).__float__(x)
except OverflowError:
x = float('inf') if x > 0 else float('-inf')
add_partial(x, partials)
return _sum(partials)
# === Private utilities ===
class _countiter:
"""Iterator that counts how many elements it has seen.
>>> c = _countiter(['a', 1, None, 'c'])
>>> _ = list(c)
>>> c.count
4
"""
def __init__(self, iterable):
self.it = iter(iterable)
self.count = 0
def __next__(self):
x = next(self.it) # This must occur before incrementing the count.
self.count += 1
return x
def __iter__(self):
return self
def _welford(data):
"""Welford's one-pass method for calculating variance and mean.
Expects ``data`` to be a _countiter, and returns a three-tuple of
- sum of square deviations from the mean;
- the mean;
- the number of items.
"""
assert type(data) is _countiter
n = 0
m = 0
ss = 0
for x in data:
n = n + 1
delta = x - m
m = m + delta/n
ss = ss + delta*(x - m)
assert n == data.count
return (ss, m, n)
def _direct(data, m, n):
"""Direct method for calculating variance (compensated version).
Expects ``data`` to be a sequence, and ``m`` to be the mean of the data.
``n`` should be the number of items, or None. Returns the sum of squared
deviations from the mean.
"""
assert m is not None
ss = sum((x-m)**2 for x in data)
if n:
# The following sum should mathematically equal zero, but
# due to rounding error may not.
ss -= sum((x-m) for x in data)**2/n
return ss
def _var_helper(data, m):
"""Return (sum of square deviations, mean, count) of data."""
# Under no circumstances use the so-called "computational formula for
# variance", as that is only suitable for hand calculations with a small
# amount of low-precision data. It has terrible numeric properties.
#
# See a comparison of three computational methods here:
# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
try:
n = len(data)
except TypeError:
n = None
data = _countiter(data)
if m is None:
if n is None:
# data must be an iterator.
ss, m, n = _welford(data)
else:
m = mean(data)
ss = _direct(data, m, n)
else:
ss = _direct(data, m, n)
if n is None:
n = data.count
assert not ss < 0, 'sum of square deviations is negative'
return (ss, m, n)
# === Measures of central tendency (averages) ===
def mean(data):
"""mean(data) -> arithmetic mean of data
Return the sample arithmetic mean of ``data``, a sequence or iterator
of real-valued numbers.
The arithmetic mean is the sum of the data divided by the number of
data points. It is commonly called "the average", although it is only
one of many different mathematical averages. It is a measure of the
central location of the data.
Examples
--------
>>> mean([1, 2, 3, 4, 4])
2.8
>>> from fractions import Fraction as F
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
Fraction(13, 21)
>>> from decimal import Decimal as D
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
Decimal('0.5625')
Errors
------
If ``data`` is empty, StatisticsError will be raised.
Additional Information
----------------------
The mean is strongly effected by outliers and is not a robust estimator
for central location: the mean is not necessarily a typical example of
the data points. For a more robust, although less efficient, measures
of central location, see ``median`` and ``mode``.
The sample mean gives an unbiased estimate of the true population mean,
which means that on average, ``mean(sample)`` will equal the mean of
the entire population. If you call ``mean`` with the entire population,
the result returned is the population mean \N{GREEK SMALL LETTER MU}.
"""
try:
n = len(data)
except TypeError:
n = None
data = _countiter(data)
total = sum(data)
if n is None:
n = data.count
if n:
return total/n
else:
raise StatisticsError('mean of empty data is not defined')
class median:
"""Return the median (middle value) of numeric data.
There are (at least) four different methods for calculating median,
depending on whether or not you allow interpolation between data points:
1. The most common method returns the middle value if there are an
odd number of values, or the average of the two middle values
when there are an even number of values.
2. The "low median" returns the middle value, or the smaller of the
two middle values.
3. The "high median" returns the middle value, or the larger of the
two middle values.
4. For grouped continuous data, it is common to treat the median as
the 50th percentile and interpolate when there are duplicate
values.
Examples
--------
To get the regular median (#1 above), call ``median`` directly:
>>> median([2, 3, 4, 5])
3.5
This is best suited when your data is discrete, and you don't mind that
the median may not be an actual data point.
The other calculation methods are provided as methods on ``median``:
>>> median.low([2, 3, 4, 5])
3
>>> median.high([2, 3, 4, 5])
4
>>> median.grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
2.8333333333...
For further details, see the individual methods.
"""
# If you think that having four definitions of median is annoying, you
# ought to see the FIFTEEN definitions for quartiles!
_ERROR = StatisticsError("no median for empty data")
# FIXME: investigate ways to calculate medians without sorting?
def __new__(cls, data):
"""Return median of data using the common mean-of-middle-two method.
If the number of data points is odd, the middle data point is
returned:
>>> median([1, 3, 5])
3
When the number of data points is even, the average of the two
middle values is returned:
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if n == 0:
raise cls._ERROR
if n%2 == 1:
return data[n//2]
else:
i = n//2
return (data[i - 1] + data[i])/2
@classmethod
def low(cls, data):
"""Return the low median of data.
The low median is always a member of the data set. When the number
of data points is odd, the middle value is returned. When it is
even, the smaller of the two middle values is returned.
>>> median.low([1, 3, 5])
3
>>> median.low([1, 3, 5, 7])
3
"""
data = sorted(data)
n = len(data)
if n == 0:
raise cls._ERROR
if n%2 == 1:
return data[n//2]
else:
return data[n//2 - 1]
@classmethod
def high(cls, data):
"""Return the high median of data.
The high median is always a member of the data set. When the number
of data points is odd, the middle value is returned. When it is
even, the larger of the two middle values is returned.
>>> median.high([1, 3, 5])
3
>>> median.high([1, 3, 5, 7])
5
"""
data = sorted(data)
n = len(data)
if n == 0:
raise cls._ERROR
return data[n//2]
@classmethod
def grouped(cls, data, interval=1):
""""Return the median of grouped continuous data.
>>> median.grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
3.7
This calculates the median as the 50th percentile, and should be
used when your data is continuous and grouped. In the above example,
the values 1, 2, 3, etc. actually represent the midpoint of classes
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
class 3.5-4.5, and interpolation is used to estimate it.
Optional argument ``interval`` represents the class interval, and
defaults to 1.
>>> median.grouped([52, 52, 53, 54])
52.5
>>> median.grouped([1, 3, 3, 5, 7])
3.25
>>> median.grouped([1, 3, 3, 5, 7], 2)
3.5
This function does not check whether the data points are at least
``interval`` apart, and is equivalent to the Gnumeric spreadsheet
function "ssmedian".
"""
# References:
# http://www.ualberta.ca/~opscan/median.html
# https://mail.gnome.org/archives/gnumeric-list/2011-April/msg00018.html
# https://projects.gnome.org/gnumeric/doc/gnumeric-function-SSMEDIAN.shtml
data = sorted(data)
n = len(data)
if n == 0:
raise cls._ERROR
elif n == 1:
return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# centre of the class interval.
x = data[n//2]
L = x - interval/2 # The lower limit of the median interval.
cf = data.index(x) # Number of values below the median interval.
# FIXME The following line could be more efficient for big lists.
f = data.count(x) # Number of data points in the median interval.
return L + interval*(n/2 - cf)/f
class mode:
"""mode(data [, window [, max_modes [, delta]]]) -> mode(s)
Return the most common data point, or points, from ``data``. The mode
(when it exists) is the most typical value, and is a robust measure of
central location.
Arguments
---------
data
Non-empty iterable of data points, not necessarily numeric.
window
Optional window size for estimating the mode when data is
numeric and continuous. For discrete data (numeric or not),
use the default value of 0. Otherwise, ``window`` must be an
integer 3 or larger. See ``mode.collate`` for more details.
max_modes
The maximum number of modes to return. Defaults to 1.
delta
None or a number specifying the difference in scores that
distinguishes a mode from a non-mode. Defaults to 0. See
``mode.extract`` for more details.
Examples
--------
By default, mode assumes discrete data, and returns a single value. This
is the standard treatment of the mode as commonly taught in schools:
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
3
This also works with nominal (non-numeric) data:
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red'
If your data is continuous (and not grouped), then we expect most values
will be unique, and ``mode`` to raise an exception:
>>> data = [1.1, 1.8, 2.4, 3.3, 3.4, 3.5, 4.6]
>>> mode(data) #doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
StatisticsError: no mode
In this case, the mode represents the peak in the distribution, and we
can estimate it by looking at multiple values at once, using a sliding
window. Pass a non-zero int value, 3 or higher, for the ``window``
argument:
>>> mode(data, window=3)
3.4
If you suspect that your data has more than one mode, pass a positive
int as the ``max_mode`` argument, and no more than that many modes will
be returned as a list:
>>> mode([5, 3, 2, 1, 5, 4, 2, 2, 5], max_modes=3)
[2, 5]
By default, peaks must have exactly the same height to count as multiple
modes. To relax that restriction, supply argument ``delta``. See the
``mode.extract`` method for details.
Additional Methods
------------------
``mode`` provides two methods to assist in determining how many peaks
a data set actually has:
mode.collate
Returns the data set collated into sorted (value, frequency)
pairs.
mode.extract
Decide which value or values should be considered a peak of
the sample, and return only those values.
See the individual methods for more details.
Errors
------
If your data is empty, or if it has more modes than you specified as
``max_modes`` (default of 1), then ``mode`` will raise StatisticsError.
If you specify ``window`` less than 3, or greater than the number of
data points, ``mode`` will raise ValueError.
Additional Information
----------------------
The mode is the only measure of location available for nominal data.
Smaller window sizes have better resolution, and a chance to find
narrow but tall peaks, but also increase the chance of missing the
true mode and finding a chance fluctuation in the data. In general
you should pick the largest window size your data will stand.
The mode is a robust measure of the centre of a distribution even
when data contains outliers.
"""
def __new__(cls, data, window=0, max_modes=1, delta=0):
c = cls.collate(data, window)
if not c:
raise StatisticsError('data must not be empty (no mode)')
elif c[0][1] and window == 1:
# Discrete data and every item is unique.
raise StatisticsError('data has no mode')
modes = cls.extract(c, window, delta)
if len(modes) > max_modes:
raise StatisticsError('too many modes: %d' % len(modes))
elif max_modes == 1:
assert len(modes) == 1
return modes[0]
return modes
@classmethod
def collate(cls, data, window):
"""Collate values from data using window-sized intervals.
Arguments
---------
data
Non-empty iterable of data; must be numeric unless ``window``
is zero.
window
for discrete or nominal data, 0; otherwise for continuous
data, an int equal to or greater than 3.
Returns a list of (value, score), sorted into descending order by
score, where the values are:
- data points, for discrete and nominal data;
- an interpolated mid-point of the sliding window into the data,
for continuous data (``window`` != 0).
"""
if window == 0:
# Calculate scores for discrete data by counting.
return collections.Counter(data).most_common()
# Otherwise we estimate scores for continuous data using a
# technique called "Estimating the rate of an inhomogeneous
# Poisson process from Jth waiting times", using the algorithm
# from "Numerical Recipes In Pascal", Press et al.,
# Cambridge University Press, 1992, p.508.
if window < 3:
raise ValueError('window size must be at least 3')
data = sorted(data)
n = len(data)
if window > n:
raise ValueError('too few data points for window size')
collation = []
for i in range(n-window+1):
a, b = data[i], data[i+window-1]
x = (a+b)/2
score = window/(n*(b-a)) if b!= a else float('inf')
collation.append((x, score))
collation.sort(key=operator.itemgetter(1), reverse=True)
return collation
@classmethod
def extract(cls, collation, window, delta):
"""Extract modal values from collated (value, score) pairs.
``extract`` takes a sorted, collated list, and determines which
elements should be considered modes by comparing the score of each
element with the score of the first, then returns those values in
a list in ascending order.
Scores can be interpreted in two ways:
- for discrete data, the score is the frequency of the value;
- for continuous data, the score is an estimate of the
frequency within the given window size.
Arguments
---------
collation
List of (value, score) pairs, sorted in order of
increasing frequency, as generated by the ``collate``
method.
window
0, or window size as used by ``mode.collate``.
delta
None, or a numeric difference in score required to
distinguish modes from non-modes in your data.
If ``delta`` is a non-zero number, then two scores must differ by
at least that amount to be distinguished. If ``delta`` is zero,
(the default), then scores are compared using the ``!=`` operator.
If ``delta`` is None, and ``window`` is zero (i.e. as used for
discrete data) then scores are compared using ``!=``. Otherwise
scores are distinguished if the relative error between them is
greater than twice the square root of the window size.
"""
if delta is None:
if window == 0:
diff = operator.ne
else:
# See Press et al. above.
k = math.sqrt(window)
# Factor of 2 below gives a 95% confidence; use 1* for
# a 68% confidence or 3* for a 98.5% confidence (approx).
diff = lambda a, b: k*abs(a - b) >= 2*abs(max(a, b))
elif delta == 0:
diff = operator.ne
else:
diff = lambda a, b: abs(a - b) >= delta
a = collation[0][1]
for i in range(1, len(collation)):
b = collation[i][1]
if diff(a, b):
collation = collation[:i]
break
return [t[0] for t in collation]
# === Measures of spread ===
# See http://mathworld.wolfram.com/Variance.html
# http://mathworld.wolfram.com/SampleVariance.html
# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
def variance(data, m=None):
"""variance(data [, m]) -> sample variance of numeric data
Return the sample variance of ``data``, a sequence or iterator of
real-valued numbers.
Variance is a measure of the variability (spread or dispersion) of
data. A large variance indicates that the data is spread out; a small
variance indicates it is clustered closely around the central location.
Arguments
---------
data
iterable of numeric (non-complex) data with at least two values.
m
(optional) mean of data, or None.
Examples
--------
>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
>>> variance(data)
1.3720238095238095
If you have already calculated the mean of your data, you can pass it as
the optional second argument ``m`` to avoid recalculating it:
>>> m = mean(data)
>>> variance(data, m)
1.3720238095238095
.. CAUTION:: Using arbitrary values for ``m`` which are not the
actual mean may lead to invalid or impossible results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('31.01875')
>>> from fractions import Fraction as F
>>> variance([F(1, 6), F(1, 2), F(5, 3)])
Fraction(67, 108)
Additional Information
----------------------
This is the unbiased sample variance s\N{SUPERSCRIPT TWO} with Bessel's
correction, also known as variance with N-1 degrees of freedom. If you
know the true population mean \N{GREEK SMALL LETTER MU} you should use
the ``pvariance`` function instead.
"""
ss, m, n = _var_helper(data, m)
if n < 2:
raise StatsError('variance requires at least two data points')
return ss/(n-1)
def pvariance(data, m=None):
"""pvariance(data [, m]) -> population variance of numeric data
Return the population variance of ``data``, a sequence or iterator
of real-valued numbers.
Variance is a measure of the variability (spread or dispersion) of
data. A large variance indicates that the data is spread out; a small
variance indicates it is clustered closely around the central location.
Arguments
---------
data
non-empty iterable of numeric (non-complex) data.
m
(optional) mean of data, or None.
If your data represents the entire population, you should use this
function; otherwise you should normally use ``variance`` instead.
Examples
--------
>>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
>>> pvariance(data)
1.25
If you have already calculated the mean of your data, you can pass it as
the optional second argument ``m`` to avoid recalculating it:
>>> m = mean(data)
>>> pvariance(data, m)
1.25
.. CAUTION:: Using arbitrary values for ``m`` which are not the
actual mean may lead to invalid or impossible results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('24.8150')
>>> from fractions import Fraction as F
>>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
Fraction(13, 72)
Additional Information
----------------------
When called with the entire population, this gives the population variance
\N{GREEK SMALL LETTER SIGMA}\N{SUPERSCRIPT TWO}. When called on a sample
instead, this is the biased sample variance s\N{SUPERSCRIPT TWO}, also
known as variance with N degrees of freedom.
If you somehow know the true population mean \N{GREEK SMALL LETTER MU},
you should use this function to calculate the sample variance instead of
the ``variance`` function, giving the known population mean as argument
``m``. In that case, the result will be an unbiased estimate of the
population variance.
"""
ss, m, n = _var_helper(data, m)
if n < 1:
raise StatsError('pvariance requires at least one data point')
return ss/n
def stdev(data, m=None):
"""stdev(data [, m]) -> sample standard deviation of numeric data
Return the square root of the sample variance. See ``variance`` for
arguments and other details.
>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
1.0810874155219827
"""
var = variance(data, m)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)
def pstdev(data, m=None):
"""pstdev(data [, m]) -> population standard deviation of numeric data
Return the square root of the population variance. See ``pvariance`` for
arguments and other details.
>>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
0.986893273527251
"""
var = pvariance(data, m)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)