diff options
Diffstat (limited to 'modules/language/python/module')
-rw-r--r-- | modules/language/python/module/_python.scm | 46 | ||||
-rw-r--r-- | modules/language/python/module/abc.scm | 9 | ||||
-rw-r--r-- | modules/language/python/module/fractions.py | 645 | ||||
-rw-r--r-- | modules/language/python/module/numbers.py | 393 | ||||
-rw-r--r-- | modules/language/python/module/operator.scm | 34 | ||||
-rw-r--r-- | modules/language/python/module/statistics.py | 672 |
6 files changed, 1776 insertions, 23 deletions
diff --git a/modules/language/python/module/_python.scm b/modules/language/python/module/_python.scm index 3998b19..6db20a2 100644 --- a/modules/language/python/module/_python.scm +++ b/modules/language/python/module/_python.scm @@ -122,7 +122,7 @@ (define-method (issubclass x y) #f) (define-method (issubclass (sub <p>) (cls <p>)) (aif it (ref cls '__subclasscheck__) - (it cls sub) + (it sub) (if (eq? sub cls) #t (if (memq cls (ref sub '__mro__)) @@ -141,6 +141,14 @@ (lambda () (is-a? x y)) (lambda x #f))))) +(define-method (isinstance (i <integer>) (y <p>)) + (cond + ((ref y '__instancecheck__) => + (lambda (it) + (it i))) + (else + (next-method)))) + (define-method (isinstance (i <integer>) y) (if (issubclass y int) #t @@ -149,6 +157,14 @@ (isinstance i (cdr y))) (is-a? i y)))) +(define-method (isinstance (i <real>) (y <p>)) + (cond + ((ref y '__instancecheck__) => + (lambda (it) + (it i))) + (else + (next-method)))) + (define-method (isinstance (i <real>) y) (if (issubclass y float) #t @@ -157,6 +173,14 @@ (isinstance i (cdr y))) (is-a? i y)))) +(define-method (isinstance (i <pair>) (y <p>)) + (cond + ((ref y '__instancecheck__) => + (lambda (it) + (it i))) + (else + (next-method)))) + (define-method (isinstance (i <pair>) y) (if (issubclass y tuple) #t @@ -165,14 +189,30 @@ (isinstance i (cdr y))) (is-a? i y)))) +(define-method (isinstance (i <string>) (y <p>)) + (cond + ((ref y '__instancecheck__) => + (lambda (it) + (it i))) + (else + (next-method)))) + (define-method (isinstance (i <string>) y) (if (issubclass y str) #t - (if (pair? y) - (or (isinstance i (car y)) + (if (pair? y) + (or (isinstance i (car y)) (isinstance i (cdr y))) (is-a? i y)))) +(define-method (isinstance (i <bytevector>) (y <p>)) + (cond + ((ref y '__instancecheck__) => + (lambda (it) + (it i))) + (else + (next-method)))) + (define-method (isinstance (i <bytevector>) y) (if (issubclass y bytes) #t diff --git a/modules/language/python/module/abc.scm b/modules/language/python/module/abc.scm index f0b8442..928ef5b 100644 --- a/modules/language/python/module/abc.scm +++ b/modules/language/python/module/abc.scm @@ -1,6 +1,7 @@ (define-module (language python module abc) #:use-module (language python module weakref) #:use-module (oop pf-objects) + #:use-module (oop goops) #:use-module (ice-9 control) #:use-module (language python for) #:use-module (language python try) @@ -75,8 +76,10 @@ (define register (lambda (cls subclass) - (if (not (isinstance subclass type)) - (raise TypeError "Can only register classes")) + (if (not (or (isinstance subclass type) + (and (not (is-a? subclass <p>)) + ((@@ (oop goops) class?) subclass)))) + (raise (TypeError "Can only register classes"))) (if (issubclass subclass cls) subclass @@ -151,7 +154,7 @@ (ret #t))) (aif it (ref cls '__subclasses__) - (for ((scls : (it))) () + (for ((scls : (it cls))) () (when (issubclass subclass scls) ((ref (ref cls '_abc_cache) 'add) subclass) (ret #t))) diff --git a/modules/language/python/module/fractions.py b/modules/language/python/module/fractions.py new file mode 100644 index 0000000..82d561c --- /dev/null +++ b/modules/language/python/module/fractions.py @@ -0,0 +1,645 @@ +module(fractions) + +# Originally contributed by Sjoerd Mullender. +# Significantly modified by Jeffrey Yasskin <jyasskin at gmail.com>. + +"""Fraction, infinite-precision, real numbers.""" + +from decimal import Decimal +import math +import numbers +import operator +import re +import sys + +__all__ = ['Fraction', 'gcd'] + + + +def gcd(a, b): + """Calculate the Greatest Common Divisor of a and b. + + Unless b==0, the result will have the same sign as b (so that when + b is divided by it, the result comes out positive). + """ + import warnings + warnings.warn('fractions.gcd() is deprecated. Use math.gcd() instead.', + DeprecationWarning, 2) + if type(a) is int is type(b): + if (b or a) < 0: + return -math.gcd(a, b) + return math.gcd(a, b) + return _gcd(a, b) + +def _gcd(a, b): + # Supports non-integers for backward compatibility. + while b: + a, b = b, a%b + return a + +# Constants related to the hash implementation; hash(x) is based +# on the reduction of x modulo the prime _PyHASH_MODULUS. +_PyHASH_MODULUS = sys.hash_info.modulus +# Value to be used for rationals that reduce to infinity modulo +# _PyHASH_MODULUS. +_PyHASH_INF = sys.hash_info.inf + +_RATIONAL_FORMAT = re.compile(r""" + \A\s* # optional whitespace at the start, then + (?P<sign>[-+]?) # an optional sign, then + (?=\d|\.\d) # lookahead for digit or .digit + (?P<num>\d*) # numerator (possibly empty) + (?: # followed by + (?:/(?P<denom>\d+))? # an optional denominator + | # or + (?:\.(?P<decimal>\d*))? # an optional fractional part + (?:E(?P<exp>[-+]?\d+))? # and optional exponent + ) + \s*\Z # and optional whitespace to finish +""", re.VERBOSE | re.IGNORECASE) + + +class Fraction(numbers.Rational): + """This class implements rational numbers. + + In the two-argument form of the constructor, Fraction(8, 6) will + produce a rational number equivalent to 4/3. Both arguments must + be Rational. The numerator defaults to 0 and the denominator + defaults to 1 so that Fraction(3) == 3 and Fraction() == 0. + + Fractions can also be constructed from: + + - numeric strings similar to those accepted by the + float constructor (for example, '-2.3' or '1e10') + + - strings of the form '123/456' + + - float and Decimal instances + + - other Rational instances (including integers) + + """ + + __slots__ = ('_numerator', '_denominator') + + # We're immutable, so use __new__ not __init__ + def __new__(cls, numerator=0, denominator=None, *, _normalize=True): + """Constructs a Rational. + + Takes a string like '3/2' or '1.5', another Rational instance, a + numerator/denominator pair, or a float. + + Examples + -------- + + >>> Fraction(10, -8) + Fraction(-5, 4) + >>> Fraction(Fraction(1, 7), 5) + Fraction(1, 35) + >>> Fraction(Fraction(1, 7), Fraction(2, 3)) + Fraction(3, 14) + >>> Fraction('314') + Fraction(314, 1) + >>> Fraction('-35/4') + Fraction(-35, 4) + >>> Fraction('3.1415') # conversion from numeric string + Fraction(6283, 2000) + >>> Fraction('-47e-2') # string may include a decimal exponent + Fraction(-47, 100) + >>> Fraction(1.47) # direct construction from float (exact conversion) + Fraction(6620291452234629, 4503599627370496) + >>> Fraction(2.25) + Fraction(9, 4) + >>> Fraction(Decimal('1.47')) + Fraction(147, 100) + + """ + self = super(Fraction, cls).__new__(cls) + + if denominator is None: + if type(numerator) is int: + self._numerator = numerator + self._denominator = 1 + return self + + elif isinstance(numerator, numbers.Rational): + self._numerator = numerator.numerator + self._denominator = numerator.denominator + return self + + elif isinstance(numerator, (float, Decimal)): + # Exact conversion + self._numerator, self._denominator = numerator.as_integer_ratio() + return self + + elif isinstance(numerator, str): + # Handle construction from strings. + m = _RATIONAL_FORMAT.match(numerator) + if m is None: + raise ValueError('Invalid literal for Fraction: %r' % + numerator) + numerator = int(m.group('num') or '0') + denom = m.group('denom') + if denom: + denominator = int(denom) + else: + denominator = 1 + decimal = m.group('decimal') + if decimal: + scale = 10**len(decimal) + numerator = numerator * scale + int(decimal) + denominator *= scale + exp = m.group('exp') + if exp: + exp = int(exp) + if exp >= 0: + numerator *= 10**exp + else: + denominator *= 10**-exp + if m.group('sign') == '-': + numerator = -numerator + + else: + raise TypeError("argument should be a string " + "or a Rational instance") + + elif type(numerator) is int is type(denominator): + pass # *very* normal case + + elif (isinstance(numerator, numbers.Rational) and + isinstance(denominator, numbers.Rational)): + numerator, denominator = ( + numerator.numerator * denominator.denominator, + denominator.numerator * numerator.denominator + ) + else: + raise TypeError("both arguments should be " + "Rational instances") + + if denominator == 0: + raise ZeroDivisionError('Fraction(%s, 0)' % numerator) + if _normalize: + if type(numerator) is int is type(denominator): + # *very* normal case + g = math.gcd(numerator, denominator) + if denominator < 0: + g = -g + else: + g = _gcd(numerator, denominator) + numerator //= g + denominator //= g + self._numerator = numerator + self._denominator = denominator + return self + + @classmethod + def from_float(cls, f): + """Converts a finite float to a rational number, exactly. + + Beware that Fraction.from_float(0.3) != Fraction(3, 10). + + """ + if isinstance(f, numbers.Integral): + return cls(f) + elif not isinstance(f, float): + raise TypeError("%s.from_float() only takes floats, not %r (%s)" % + (cls.__name__, f, type(f).__name__)) + return cls(*f.as_integer_ratio()) + + @classmethod + def from_decimal(cls, dec): + """Converts a finite Decimal instance to a rational number, exactly.""" + from decimal import Decimal + if isinstance(dec, numbers.Integral): + dec = Decimal(int(dec)) + elif not isinstance(dec, Decimal): + raise TypeError( + "%s.from_decimal() only takes Decimals, not %r (%s)" % + (cls.__name__, dec, type(dec).__name__)) + return cls(*dec.as_integer_ratio()) + + def limit_denominator(self, max_denominator=1000000): + """Closest Fraction to self with denominator at most max_denominator. + + >>> Fraction('3.141592653589793').limit_denominator(10) + Fraction(22, 7) + >>> Fraction('3.141592653589793').limit_denominator(100) + Fraction(311, 99) + >>> Fraction(4321, 8765).limit_denominator(10000) + Fraction(4321, 8765) + + """ + # Algorithm notes: For any real number x, define a *best upper + # approximation* to x to be a rational number p/q such that: + # + # (1) p/q >= x, and + # (2) if p/q > r/s >= x then s > q, for any rational r/s. + # + # Define *best lower approximation* similarly. Then it can be + # proved that a rational number is a best upper or lower + # approximation to x if, and only if, it is a convergent or + # semiconvergent of the (unique shortest) continued fraction + # associated to x. + # + # To find a best rational approximation with denominator <= M, + # we find the best upper and lower approximations with + # denominator <= M and take whichever of these is closer to x. + # In the event of a tie, the bound with smaller denominator is + # chosen. If both denominators are equal (which can happen + # only when max_denominator == 1 and self is midway between + # two integers) the lower bound---i.e., the floor of self, is + # taken. + + if max_denominator < 1: + raise ValueError("max_denominator should be at least 1") + if self._denominator <= max_denominator: + return Fraction(self) + + p0, q0, p1, q1 = 0, 1, 1, 0 + n, d = self._numerator, self._denominator + while True: + a = n//d + q2 = q0+a*q1 + if q2 > max_denominator: + break + p0, q0, p1, q1 = p1, q1, p0+a*p1, q2 + n, d = d, n-a*d + + k = (max_denominator-q0)//q1 + bound1 = Fraction(p0+k*p1, q0+k*q1) + bound2 = Fraction(p1, q1) + if abs(bound2 - self) <= abs(bound1-self): + return bound2 + else: + return bound1 + + @property + def numerator(a): + return a._numerator + + @property + def denominator(a): + return a._denominator + + def __repr__(self): + """repr(self)""" + return '%s(%s, %s)' % (self.__class__.__name__, + self._numerator, self._denominator) + + def __str__(self): + """str(self)""" + if self._denominator == 1: + return str(self._numerator) + else: + return '%s/%s' % (self._numerator, self._denominator) + + def _operator_fallbacks(monomorphic_operator, fallback_operator): + """Generates forward and reverse operators given a purely-rational + operator and a function from the operator module. + + Use this like: + __op__, __rop__ = _operator_fallbacks(just_rational_op, operator.op) + + In general, we want to implement the arithmetic operations so + that mixed-mode operations either call an implementation whose + author knew about the types of both arguments, or convert both + to the nearest built in type and do the operation there. In + Fraction, that means that we define __add__ and __radd__ as: + + def __add__(self, other): + # Both types have numerators/denominator attributes, + # so do the operation directly + if isinstance(other, (int, Fraction)): + return Fraction(self.numerator * other.denominator + + other.numerator * self.denominator, + self.denominator * other.denominator) + # float and complex don't have those operations, but we + # know about those types, so special case them. + elif isinstance(other, float): + return float(self) + other + elif isinstance(other, complex): + return complex(self) + other + # Let the other type take over. + return NotImplemented + + def __radd__(self, other): + # radd handles more types than add because there's + # nothing left to fall back to. + if isinstance(other, numbers.Rational): + return Fraction(self.numerator * other.denominator + + other.numerator * self.denominator, + self.denominator * other.denominator) + elif isinstance(other, Real): + return float(other) + float(self) + elif isinstance(other, Complex): + return complex(other) + complex(self) + return NotImplemented + + + There are 5 different cases for a mixed-type addition on + Fraction. I'll refer to all of the above code that doesn't + refer to Fraction, float, or complex as "boilerplate". 'r' + will be an instance of Fraction, which is a subtype of + Rational (r : Fraction <: Rational), and b : B <: + Complex. The first three involve 'r + b': + + 1. If B <: Fraction, int, float, or complex, we handle + that specially, and all is well. + 2. If Fraction falls back to the boilerplate code, and it + were to return a value from __add__, we'd miss the + possibility that B defines a more intelligent __radd__, + so the boilerplate should return NotImplemented from + __add__. In particular, we don't handle Rational + here, even though we could get an exact answer, in case + the other type wants to do something special. + 3. If B <: Fraction, Python tries B.__radd__ before + Fraction.__add__. This is ok, because it was + implemented with knowledge of Fraction, so it can + handle those instances before delegating to Real or + Complex. + + The next two situations describe 'b + r'. We assume that b + didn't know about Fraction in its implementation, and that it + uses similar boilerplate code: + + 4. If B <: Rational, then __radd_ converts both to the + builtin rational type (hey look, that's us) and + proceeds. + 5. Otherwise, __radd__ tries to find the nearest common + base ABC, and fall back to its builtin type. Since this + class doesn't subclass a concrete type, there's no + implementation to fall back to, so we need to try as + hard as possible to return an actual value, or the user + will get a TypeError. + + """ + def forward(a, b): + if isinstance(b, (int, Fraction)): + return monomorphic_operator(a, b) + elif isinstance(b, float): + return fallback_operator(float(a), b) + elif isinstance(b, complex): + return fallback_operator(complex(a), b) + else: + return NotImplemented + forward.__name__ = '__' + fallback_operator.__name__ + '__' + forward.__doc__ = monomorphic_operator.__doc__ + + def reverse(b, a): + if isinstance(a, numbers.Rational): + # Includes ints. + return monomorphic_operator(a, b) + elif isinstance(a, numbers.Real): + return fallback_operator(float(a), float(b)) + elif isinstance(a, numbers.Complex): + return fallback_operator(complex(a), complex(b)) + else: + return NotImplemented + reverse.__name__ = '__r' + fallback_operator.__name__ + '__' + reverse.__doc__ = monomorphic_operator.__doc__ + + return forward, reverse + + def _add(a, b): + """a + b""" + da, db = a.denominator, b.denominator + return Fraction(a.numerator * db + b.numerator * da, + da * db) + + __add__, __radd__ = _operator_fallbacks(_add, operator.add) + + def _sub(a, b): + """a - b""" + da, db = a.denominator, b.denominator + return Fraction(a.numerator * db - b.numerator * da, + da * db) + + __sub__, __rsub__ = _operator_fallbacks(_sub, operator.sub) + + def _mul(a, b): + """a * b""" + return Fraction(a.numerator * b.numerator, a.denominator * b.denominator) + + __mul__, __rmul__ = _operator_fallbacks(_mul, operator.mul) + + def _div(a, b): + """a / b""" + return Fraction(a.numerator * b.denominator, + a.denominator * b.numerator) + + __truediv__, __rtruediv__ = _operator_fallbacks(_div, operator.truediv) + + def __floordiv__(a, b): + """a // b""" + return math.floor(a / b) + + def __rfloordiv__(b, a): + """a // b""" + return math.floor(a / b) + + def __mod__(a, b): + """a % b""" + div = a // b + return a - b * div + + def __rmod__(b, a): + """a % b""" + div = a // b + return a - b * div + + def __pow__(a, b): + """a ** b + + If b is not an integer, the result will be a float or complex + since roots are generally irrational. If b is an integer, the + result will be rational. + + """ + if isinstance(b, numbers.Rational): + if b.denominator == 1: + power = b.numerator + if power >= 0: + return Fraction(a._numerator ** power, + a._denominator ** power, + _normalize=False) + elif a._numerator >= 0: + return Fraction(a._denominator ** -power, + a._numerator ** -power, + _normalize=False) + else: + return Fraction((-a._denominator) ** -power, + (-a._numerator) ** -power, + _normalize=False) + else: + # A fractional power will generally produce an + # irrational number. + return float(a) ** float(b) + else: + return float(a) ** b + + def __rpow__(b, a): + """a ** b""" + if b._denominator == 1 and b._numerator >= 0: + # If a is an int, keep it that way if possible. + return a ** b._numerator + + if isinstance(a, numbers.Rational): + return Fraction(a.numerator, a.denominator) ** b + + if b._denominator == 1: + return a ** b._numerator + + return a ** float(b) + + def __pos__(a): + """+a: Coerces a subclass instance to Fraction""" + return Fraction(a._numerator, a._denominator, _normalize=False) + + def __neg__(a): + """-a""" + return Fraction(-a._numerator, a._denominator, _normalize=False) + + def __abs__(a): + """abs(a)""" + return Fraction(abs(a._numerator), a._denominator, _normalize=False) + + def __trunc__(a): + """trunc(a)""" + if a._numerator < 0: + return -(-a._numerator // a._denominator) + else: + return a._numerator // a._denominator + + def __floor__(a): + """Will be math.floor(a) in 3.0.""" + return a.numerator // a.denominator + + def __ceil__(a): + """Will be math.ceil(a) in 3.0.""" + # The negations cleverly convince floordiv to return the ceiling. + return -(-a.numerator // a.denominator) + + def __round__(self, ndigits=None): + """Will be round(self, ndigits) in 3.0. + + Rounds half toward even. + """ + if ndigits is None: + floor, remainder = divmod(self.numerator, self.denominator) + if remainder * 2 < self.denominator: + return floor + elif remainder * 2 > self.denominator: + return floor + 1 + # Deal with the half case: + elif floor % 2 == 0: + return floor + else: + return floor + 1 + shift = 10**abs(ndigits) + # See _operator_fallbacks.forward to check that the results of + # these operations will always be Fraction and therefore have + # round(). + if ndigits > 0: + return Fraction(round(self * shift), shift) + else: + return Fraction(round(self / shift) * shift) + + def __hash__(self): + """hash(self)""" + + # XXX since this method is expensive, consider caching the result + + # In order to make sure that the hash of a Fraction agrees + # with the hash of a numerically equal integer, float or + # Decimal instance, we follow the rules for numeric hashes + # outlined in the documentation. (See library docs, 'Built-in + # Types'). + + # dinv is the inverse of self._denominator modulo the prime + # _PyHASH_MODULUS, or 0 if self._denominator is divisible by + # _PyHASH_MODULUS. + dinv = pow(self._denominator, _PyHASH_MODULUS - 2, _PyHASH_MODULUS) + if not dinv: + hash_ = _PyHASH_INF + else: + hash_ = abs(self._numerator) * dinv % _PyHASH_MODULUS + result = hash_ if self >= 0 else -hash_ + return -2 if result == -1 else result + + def __eq__(a, b): + """a == b""" + if type(b) is int: + return a._numerator == b and a._denominator == 1 + if isinstance(b, numbers.Rational): + return (a._numerator == b.numerator and + a._denominator == b.denominator) + if isinstance(b, numbers.Complex) and b.imag == 0: + b = b.real + if isinstance(b, float): + if math.isnan(b) or math.isinf(b): + # comparisons with an infinity or nan should behave in + # the same way for any finite a, so treat a as zero. + return 0.0 == b + else: + return a == a.from_float(b) + else: + # Since a doesn't know how to compare with b, let's give b + # a chance to compare itself with a. + return NotImplemented + + def _richcmp(self, other, op): + """Helper for comparison operators, for internal use only. + + Implement comparison between a Rational instance `self`, and + either another Rational instance or a float `other`. If + `other` is not a Rational instance or a float, return + NotImplemented. `op` should be one of the six standard + comparison operators. + + """ + # convert other to a Rational instance where reasonable. + if isinstance(other, numbers.Rational): + return op(self._numerator * other.denominator, + self._denominator * other.numerator) + if isinstance(other, float): + if math.isnan(other) or math.isinf(other): + return op(0.0, other) + else: + return op(self, self.from_float(other)) + else: + return NotImplemented + + def __lt__(a, b): + """a < b""" + return a._richcmp(b, operator.lt) + + def __gt__(a, b): + """a > b""" + return a._richcmp(b, operator.gt) + + def __le__(a, b): + """a <= b""" + return a._richcmp(b, operator.le) + + def __ge__(a, b): + """a >= b""" + return a._richcmp(b, operator.ge) + + def __bool__(a): + """a != 0""" + return a._numerator != 0 + + # support for pickling, copy, and deepcopy + + def __reduce__(self): + return (self.__class__, (str(self),)) + + def __copy__(self): + if type(self) == Fraction: + return self # I'm immutable; therefore I am my own clone + return self.__class__(self._numerator, self._denominator) + + def __deepcopy__(self, memo): + if type(self) == Fraction: + return self # My components are also immutable + return self.__class__(self._numerator, self._denominator) diff --git a/modules/language/python/module/numbers.py b/modules/language/python/module/numbers.py new file mode 100644 index 0000000..1b221c8 --- /dev/null +++ b/modules/language/python/module/numbers.py @@ -0,0 +1,393 @@ +module(numbers) + +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) for numbers, according to PEP 3141. + +TODO: Fill out more detailed documentation on the operators.""" + +from abc import ABCMeta, abstractmethod + +__all__ = ["Number", "Complex", "Real", "Rational", "Integral"] + +class Number(metaclass=ABCMeta): + """All numbers inherit from this class. + + If you just want to check if an argument x is a number, without + caring what kind, use isinstance(x, Number). + """ + __slots__ = () + + # Concrete numeric types must provide their own hash implementation + __hash__ = None + + +## Notes on Decimal +## ---------------- +## Decimal has all of the methods specified by the Real abc, but it should +## not be registered as a Real because decimals do not interoperate with +## binary floats (i.e. Decimal('3.14') + 2.71828 is undefined). But, +## abstract reals are expected to interoperate (i.e. R1 + R2 should be +## expected to work if R1 and R2 are both Reals). + +class Complex(Number): + """Complex defines the operations that work on the builtin complex type. + + In short, those are: a conversion to complex, .real, .imag, +, -, + *, /, abs(), .conjugate, ==, and !=. + + If it is given heterogenous arguments, and doesn't have special + knowledge about them, it should fall back to the builtin complex + type as described below. + """ + + __slots__ = () + + @abstractmethod + def __complex__(self): + """Return a builtin complex instance. Called for complex(self).""" + + def __bool__(self): + """True if self != 0. Called for bool(self).""" + return self != 0 + + @property + @abstractmethod + def real(self): + """Retrieve the real component of this number. + + This should subclass Real. + """ + raise NotImplementedError + + @property + @abstractmethod + def imag(self): + """Retrieve the imaginary component of this number. + + This should subclass Real. + """ + raise NotImplementedError + + @abstractmethod + def __add__(self, other): + """self + other""" + raise NotImplementedError + + @abstractmethod + def __radd__(self, other): + """other + self""" + raise NotImplementedError + + @abstractmethod + def __neg__(self): + """-self""" + raise NotImplementedError + + @abstractmethod + def __pos__(self): + """+self""" + raise NotImplementedError + + def __sub__(self, other): + """self - other""" + return self + -other + + def __rsub__(self, other): + """other - self""" + return -self + other + + @abstractmethod + def __mul__(self, other): + """self * other""" + raise NotImplementedError + + @abstractmethod + def __rmul__(self, other): + """other * self""" + raise NotImplementedError + + @abstractmethod + def __truediv__(self, other): + """self / other: Should promote to float when necessary.""" + raise NotImplementedError + + @abstractmethod + def __rtruediv__(self, other): + """other / self""" + raise NotImplementedError + + @abstractmethod + def __pow__(self, exponent): + """self**exponent; should promote to float or complex when necessary.""" + raise NotImplementedError + + @abstractmethod + def __rpow__(self, base): + """base ** self""" + raise NotImplementedError + + @abstractmethod + def __abs__(self): + """Returns the Real distance from 0. Called for abs(self).""" + raise NotImplementedError + + @abstractmethod + def conjugate(self): + """(x+y*i).conjugate() returns (x-y*i).""" + raise NotImplementedError + + @abstractmethod + def __eq__(self, other): + """self == other""" + raise NotImplementedError + +Complex.register(complex) +Complex.register(|<complex>|) + +class Real(Complex): + """To Complex, Real adds the operations that work on real numbers. + + In short, those are: a conversion to float, trunc(), divmod, + %, <, <=, >, and >=. + + Real also provides defaults for the derived operations. + """ + + __slots__ = () + + @abstractmethod + def __float__(self): + """Any Real can be converted to a native float object. + + Called for float(self).""" + raise NotImplementedError + + @abstractmethod + def __trunc__(self): + """trunc(self): Truncates self to an Integral. + + Returns an Integral i such that: + * i>0 iff self>0; + * abs(i) <= abs(self); + * for any Integral j satisfying the first two conditions, + abs(i) >= abs(j) [i.e. i has "maximal" abs among those]. + i.e. "truncate towards 0". + """ + raise NotImplementedError + + @abstractmethod + def __floor__(self): + """Finds the greatest Integral <= self.""" + raise NotImplementedError + + @abstractmethod + def __ceil__(self): + """Finds the least Integral >= self.""" + raise NotImplementedError + + @abstractmethod + def __round__(self, ndigits=None): + """Rounds self to ndigits decimal places, defaulting to 0. + + If ndigits is omitted or None, returns an Integral, otherwise + returns a Real. Rounds half toward even. + """ + raise NotImplementedError + + def __divmod__(self, other): + """divmod(self, other): The pair (self // other, self % other). + + Sometimes this can be computed faster than the pair of + operations. + """ + return (self // other, self % other) + + def __rdivmod__(self, other): + """divmod(other, self): The pair (self // other, self % other). + + Sometimes this can be computed faster than the pair of + operations. + """ + return (other // self, other % self) + + @abstractmethod + def __floordiv__(self, other): + """self // other: The floor() of self/other.""" + raise NotImplementedError + + @abstractmethod + def __rfloordiv__(self, other): + """other // self: The floor() of other/self.""" + raise NotImplementedError + + @abstractmethod + def __mod__(self, other): + """self % other""" + raise NotImplementedError + + @abstractmethod + def __rmod__(self, other): + """other % self""" + raise NotImplementedError + + @abstractmethod + def __lt__(self, other): + """self < other + + < on Reals defines a total ordering, except perhaps for NaN.""" + raise NotImplementedError + + @abstractmethod + def __le__(self, other): + """self <= other""" + raise NotImplementedError + + # Concrete implementations of Complex abstract methods. + def __complex__(self): + """complex(self) == complex(float(self), 0)""" + return complex(float(self)) + + @property + def real(self): + """Real numbers are their real component.""" + return +self + + @property + def imag(self): + """Real numbers have no imaginary component.""" + return 0 + + def conjugate(self): + """Conjugate is a no-op for Reals.""" + return +self + +Real.register(float) +Real.register(|<real>|) + +class Rational(Real): + """.numerator and .denominator should be in lowest terms.""" + + __slots__ = () + + @property + @abstractmethod + def numerator(self): + raise NotImplementedError + + @property + @abstractmethod + def denominator(self): + raise NotImplementedError + + # Concrete implementation of Real's conversion to float. + def __float__(self): + """float(self) = self.numerator / self.denominator + + It's important that this conversion use the integer's "true" + division rather than casting one side to float before dividing + so that ratios of huge integers convert without overflowing. + + """ + return self.numerator / self.denominator + +Rational.register(|<fraction>|) + +class Integral(Rational): + """Integral adds a conversion to int and the bit-string operations.""" + + __slots__ = () + + @abstractmethod + def __int__(self): + """int(self)""" + raise NotImplementedError + + def __index__(self): + """Called whenever an index is needed, such as in slicing""" + return int(self) + + @abstractmethod + def __pow__(self, exponent, modulus=None): + """self ** exponent % modulus, but maybe faster. + + Accept the modulus argument if you want to support the + 3-argument version of pow(). Raise a TypeError if exponent < 0 + or any argument isn't Integral. Otherwise, just implement the + 2-argument version described in Complex. + """ + raise NotImplementedError + + @abstractmethod + def __lshift__(self, other): + """self << other""" + raise NotImplementedError + + @abstractmethod + def __rlshift__(self, other): + """other << self""" + raise NotImplementedError + + @abstractmethod + def __rshift__(self, other): + """self >> other""" + raise NotImplementedError + + @abstractmethod + def __rrshift__(self, other): + """other >> self""" + raise NotImplementedError + + @abstractmethod + def __and__(self, other): + """self & other""" + raise NotImplementedError + + @abstractmethod + def __rand__(self, other): + """other & self""" + raise NotImplementedError + + @abstractmethod + def __xor__(self, other): + """self ^ other""" + raise NotImplementedError + + @abstractmethod + def __rxor__(self, other): + """other ^ self""" + raise NotImplementedError + + @abstractmethod + def __or__(self, other): + """self | other""" + raise NotImplementedError + + @abstractmethod + def __ror__(self, other): + """other | self""" + raise NotImplementedError + + @abstractmethod + def __invert__(self): + """~self""" + raise NotImplementedError + + # Concrete implementations of Rational and Real abstract methods. + def __float__(self): + """float(self) == float(int(self))""" + return float(int(self)) + + @property + def numerator(self): + """Integers are their own numerators.""" + return +self + + @property + def denominator(self): + """Integers have a denominator of 1.""" + return 1 + +Integral.register(int) +Integral.register(|<integer>|) diff --git a/modules/language/python/module/operator.scm b/modules/language/python/module/operator.scm index e506817..2500563 100644 --- a/modules/language/python/module/operator.scm +++ b/modules/language/python/module/operator.scm @@ -46,22 +46,22 @@ ;; Comparison Operations -(define-inlinable (lt a b) (< a b)) -(define-inlinable (le a b) (<= a b)) -(define-inlinable (eq a b) (equal? a b)) -(define-inlinable (ne a b) (not (equal? a b))) -(define-inlinable (ge a b) (>= a b)) -(define-inlinable (gt a b) (> a b)) +(define (lt a b) (< a b)) +(define (le a b) (<= a b)) +(define (eq a b) (equal? a b)) +(define (ne a b) (not (equal? a b))) +(define (ge a b) (>= a b)) +(define (gt a b) (> a b)) ;; Logical Operations -(define-inlinable (not_ a) (not a)) -(define-inlinable (truth a) (if a #t #f)) -(define-inlinable (is_ a b) (eq? a b)) -(define-inlinable (is_not a b) (not (eq? a b))) +(define (not_ a) (not a)) +(define (truth a) (if a #t #f)) +(define (is_ a b) (eq? a b)) +(define (is_not a b) (not (eq? a b))) ;; Mathematical/Bitwise Operations (define abs (@ (guile) abs)) -(define-inlinable (add a b) (+ a b)) +(define (add a b) (+ a b)) (define and_ py-logand) (define floordiv py-floordiv) (define index py-index) @@ -69,20 +69,20 @@ (define invert inv) (define lshift py-lshift) (define mod py-mod) -(define-inlinable (mul x y) (* x y)) +(define (mul x y) (* x y)) (define matmul py-matmul) -(define-inlinable (neg x) (- x)) +(define (neg x) (- x)) (define or_ py-logior) -(define-inlinable (pos a) (+ a)) +(define (pos a) (+ a)) (define pow expt) (define rshift py-rshift) -(define-inlinable (sub a b) (- a b)) +(define (sub a b) (- a b)) (define truediv py-/) (define xor py-logxor) ;; Sequence Operations -(define-inlinable (concat a b) (+ a b)) -(define-inlinable (contains a b) (in b a)) +(define (concat a b) (+ a b)) +(define (contains a b) (in b a)) (define (countOf a b) (for ((x : a)) ((c 0)) (if (equal? x b) diff --git a/modules/language/python/module/statistics.py b/modules/language/python/module/statistics.py new file mode 100644 index 0000000..9368a93 --- /dev/null +++ b/modules/language/python/module/statistics.py @@ -0,0 +1,672 @@ +module(statistics) + +""" +Basic statistics module. + +This module provides functions for calculating statistics of data, including +averages, variance, and standard deviation. + +Calculating averages +-------------------- + +================== ============================================= +Function Description +================== ============================================= +mean Arithmetic mean (average) of data. +harmonic_mean Harmonic mean of data. +median Median (middle value) of data. +median_low Low median of data. +median_high High median of data. +median_grouped Median, or 50th percentile, of grouped data. +mode Mode (most common value) of data. +================== ============================================= + +Calculate the arithmetic mean ("the average") of data: + +>>> mean([-1.0, 2.5, 3.25, 5.75]) +2.625 + + +Calculate the standard median of discrete data: + +>>> median([2, 3, 4, 5]) +3.5 + + +Calculate the median, or 50th percentile, of data grouped into class intervals +centred on the data values provided. E.g. if your data points are rounded to +the nearest whole number: + +>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS +2.8333333333... + +This should be interpreted in this way: you have two data points in the class +interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in +the class interval 3.5-4.5. The median of these data points is 2.8333... + + +Calculating variability or spread +--------------------------------- + +================== ============================================= +Function Description +================== ============================================= +pvariance Population variance of data. +variance Sample variance of data. +pstdev Population standard deviation of data. +stdev Sample standard deviation of data. +================== ============================================= + +Calculate the standard deviation of sample data: + +>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS +4.38961843444... + +If you have previously calculated the mean, you can pass it as the optional +second argument to the four "spread" functions to avoid recalculating it: + +>>> data = [1, 2, 2, 4, 4, 4, 5, 6] +>>> mu = mean(data) +>>> pvariance(data, mu) +2.5 + + +Exceptions +---------- + +A single exception is defined: StatisticsError is a subclass of ValueError. + +""" + +__all__ = [ 'StatisticsError', + 'pstdev', 'pvariance', 'stdev', 'variance', + 'median', 'median_low', 'median_high', 'median_grouped', + 'mean', 'mode', 'harmonic_mean', + ] + +import collections +import decimal +import math +import numbers + +from fractions import Fraction +from decimal import Decimal +from itertools import groupby, chain +from bisect import bisect_left, bisect_right + + + +# === Exceptions === + +class StatisticsError(ValueError): + pass + + +# === Private utilities === + +def _sum(data, start=0): + """_sum(data [, start]) -> (type, sum, count) + + Return a high-precision sum of the given numeric data as a fraction, + together with the type to be converted to and the count of items. + + If optional argument ``start`` is given, it is added to the total. + If ``data`` is empty, ``start`` (defaulting to 0) is returned. + + + Examples + -------- + + >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) + (<class 'float'>, Fraction(11, 1), 5) + + Some sources of round-off error will be avoided: + + # Built-in sum returns zero. + >>> _sum([1e50, 1, -1e50] * 1000) + (<class 'float'>, Fraction(1000, 1), 3000) + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + (<class 'fractions.Fraction'>, Fraction(63, 20), 4) + + >>> from decimal import Decimal as D + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> _sum(data) + (<class 'decimal.Decimal'>, Fraction(6963, 10000), 4) + + Mixed types are currently treated as an error, except that int is + allowed. + """ + count = 0 + n, d = _exact_ratio(start) + partials = {d: n} + partials_get = partials.get + T = _coerce(int, type(start)) + for typ, values in groupby(data, type): + T = _coerce(T, typ) # or raise TypeError + for n,d in map(_exact_ratio, values): + count += 1 + partials[d] = partials_get(d, 0) + n + if None in partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + total = partials[None] + assert not _isfinite(total) + else: + # Sum all the partial sums using builtin sum. + # FIXME is this faster if we sum them in order of the denominator? + total = sum(Fraction(n, d) for d, n in sorted(partials.items())) + return (T, total, count) + + +def _isfinite(x): + try: + return x.is_finite() # Likely a Decimal. + except AttributeError: + return math.isfinite(x) # Coerces to float first. + + +def _coerce(T, S): + """Coerce types T and S to a common type, or raise TypeError. + + Coercion rules are currently an implementation detail. See the CoerceTest + test class in test_statistics for details. + """ + # See http://bugs.python.org/issue24068. + assert T is not bool, "initial type T is bool" + # If the types are the same, no need to coerce anything. Put this + # first, so that the usual case (no coercion needed) happens as soon + # as possible. + if T is S: return T + # Mixed int & other coerce to the other type. + if S is int or S is bool: return T + if T is int: return S + # If one is a (strict) subclass of the other, coerce to the subclass. + if issubclass(S, T): return S + if issubclass(T, S): return T + # Ints coerce to the other type. + if issubclass(T, int): return S + if issubclass(S, int): return T + # Mixed fraction & float coerces to float (or float subclass). + if issubclass(T, Fraction) and issubclass(S, float): + return S + if issubclass(T, float) and issubclass(S, Fraction): + return T + # Any other combination is disallowed. + msg = "don't know how to coerce %s and %s" + raise TypeError(msg % (T.__name__, S.__name__)) + + +def _exact_ratio(x): + """Return Real number x to exact (numerator, denominator) pair. + + >>> _exact_ratio(0.25) + (1, 4) + + x is expected to be an int, Fraction, Decimal or float. + """ + try: + # Optimise the common case of floats. We expect that the most often + # used numeric type will be builtin floats, so try to make this as + # fast as possible. + if type(x) is float or type(x) is Decimal: + return x.as_integer_ratio() + try: + # x may be an int, Fraction, or Integral ABC. + return (x.numerator, x.denominator) + except AttributeError: + try: + # x may be a float or Decimal subclass. + return x.as_integer_ratio() + except AttributeError: + # Just give up? + pass + except (OverflowError, ValueError): + # float NAN or INF. + assert not _isfinite(x) + return (x, None) + msg = "can't convert type '{}' to numerator/denominator" + raise TypeError(msg.format(type(x).__name__)) + + +def _convert(value, T): + """Convert value to given numeric type T.""" + if type(value) is T: + # This covers the cases where T is Fraction, or where value is + # a NAN or INF (Decimal or float). + return value + if issubclass(T, int) and value.denominator != 1: + T = float + try: + # FIXME: what do we do if this overflows? + return T(value) + except TypeError: + if issubclass(T, Decimal): + return T(value.numerator)/T(value.denominator) + else: + raise + + +def _counts(data): + # Generate a table of sorted (value, frequency) pairs. + table = collections.Counter(iter(data)).most_common() + if not table: + return table + # Extract the values with the highest frequency. + maxfreq = table[0][1] + for i in range(1, len(table)): + if table[i][1] != maxfreq: + table = table[:i] + break + return table + + +def _find_lteq(a, x): + 'Locate the leftmost value exactly equal to x' + i = bisect_left(a, x) + if i != len(a) and a[i] == x: + return i + raise ValueError + + +def _find_rteq(a, l, x): + 'Locate the rightmost value exactly equal to x' + i = bisect_right(a, x, lo=l) + if i != (len(a)+1) and a[i-1] == x: + return i-1 + raise ValueError + + +def _fail_neg(values, errmsg='negative value'): + """Iterate over values, failing if any are less than zero.""" + for x in values: + if x < 0: + raise StatisticsError(errmsg) + yield x + + +# === Measures of central tendency (averages) === + +def mean(data): + """Return the sample arithmetic mean of data. + + >>> mean([1, 2, 3, 4, 4]) + 2.8 + + >>> from fractions import Fraction as F + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) + + >>> from decimal import Decimal as D + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + If ``data`` is empty, StatisticsError will be raised. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + T, total, count = _sum(data) + assert count == n + return _convert(total/n, T) + + +def harmonic_mean(data): + """Return the harmonic mean of data. + + The harmonic mean, sometimes called the subcontrary mean, is the + reciprocal of the arithmetic mean of the reciprocals of the data, + and is often appropriate when averaging quantities which are rates + or ratios, for example speeds. Example: + + Suppose an investor purchases an equal value of shares in each of + three companies, with P/E (price/earning) ratios of 2.5, 3 and 10. + What is the average P/E ratio for the investor's portfolio? + + >>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio. + 3.6 + + Using the arithmetic mean would give an average of about 5.167, which + is too high. + + If ``data`` is empty, or any element is less than zero, + ``harmonic_mean`` will raise ``StatisticsError``. + """ + # For a justification for using harmonic mean for P/E ratios, see + # http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/ + # http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087 + if iter(data) is data: + data = list(data) + errmsg = 'harmonic mean does not support negative values' + n = len(data) + if n < 1: + raise StatisticsError('harmonic_mean requires at least one data point') + elif n == 1: + x = data[0] + if isinstance(x, (numbers.Real, Decimal)): + if x < 0: + raise StatisticsError(errmsg) + return x + else: + raise TypeError('unsupported type') + try: + T, total, count = _sum(1/x for x in _fail_neg(data, errmsg)) + except ZeroDivisionError: + return 0 + assert count == n + return _convert(n/total, T) + + +# FIXME: investigate ways to calculate medians without sorting? Quickselect? +def median(data): + """Return the median (middle value) of numeric data. + + When the number of data points is odd, return the middle data point. + When the number of data points is even, the median is interpolated by + taking the average of the two middle values: + + >>> median([1, 3, 5]) + 3 + >>> median([1, 3, 5, 7]) + 4.0 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + i = n//2 + return (data[i - 1] + data[i])/2 + + +def median_low(data): + """Return the low median of numeric data. + + When the number of data points is odd, the middle value is returned. + When it is even, the smaller of the two middle values is returned. + + >>> median_low([1, 3, 5]) + 3 + >>> median_low([1, 3, 5, 7]) + 3 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + return data[n//2 - 1] + + +def median_high(data): + """Return the high median of data. + + When the number of data points is odd, the middle value is returned. + When it is even, the larger of the two middle values is returned. + + >>> median_high([1, 3, 5]) + 3 + >>> median_high([1, 3, 5, 7]) + 5 + + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + return data[n//2] + + +def median_grouped(data, interval=1): + """Return the 50th percentile (median) of grouped continuous data. + + >>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) + 3.7 + >>> median_grouped([52, 52, 53, 54]) + 52.5 + + This calculates the median as the 50th percentile, and should be + used when your data is continuous and grouped. In the above example, + the values 1, 2, 3, etc. actually represent the midpoint of classes + 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in + class 3.5-4.5, and interpolation is used to estimate it. + + Optional argument ``interval`` represents the class interval, and + defaults to 1. Changing the class interval naturally will change the + interpolated 50th percentile value: + + >>> median_grouped([1, 3, 3, 5, 7], interval=1) + 3.25 + >>> median_grouped([1, 3, 3, 5, 7], interval=2) + 3.5 + + This function does not check whether the data points are at least + ``interval`` apart. + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + elif n == 1: + return data[0] + # Find the value at the midpoint. Remember this corresponds to the + # centre of the class interval. + x = data[n//2] + for obj in (x, interval): + if isinstance(obj, (str, bytes)): + raise TypeError('expected number but got %r' % obj) + try: + L = x - interval/2 # The lower limit of the median interval. + except TypeError: + # Mixed type. For now we just coerce to float. + L = float(x) - float(interval)/2 + + # Uses bisection search to search for x in data with log(n) time complexity + # Find the position of leftmost occurrence of x in data + l1 = _find_lteq(data, x) + # Find the position of rightmost occurrence of x in data[l1...len(data)] + # Assuming always l1 <= l2 + l2 = _find_rteq(data, l1, x) + cf = l1 + f = l2 - l1 + 1 + return L + interval*(n/2 - cf)/f + + +def mode(data): + """Return the most common data point from discrete or nominal data. + + ``mode`` assumes discrete data, and returns a single value. This is the + standard treatment of the mode as commonly taught in schools: + + >>> mode([1, 1, 2, 3, 3, 3, 3, 4]) + 3 + + This also works with nominal (non-numeric) data: + + >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) + 'red' + + If there is not exactly one most common value, ``mode`` will raise + StatisticsError. + """ + # Generate a table of sorted (value, frequency) pairs. + table = _counts(data) + if len(table) == 1: + return table[0][0] + elif table: + raise StatisticsError( + 'no unique mode; found %d equally common values' % len(table) + ) + else: + raise StatisticsError('no mode for empty data') + + +# === Measures of spread === + +# See http://mathworld.wolfram.com/Variance.html +# http://mathworld.wolfram.com/SampleVariance.html +# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance +# +# Under no circumstances use the so-called "computational formula for +# variance", as that is only suitable for hand calculations with a small +# amount of low-precision data. It has terrible numeric properties. +# +# See a comparison of three computational methods here: +# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ + +def _ss(data, c=None): + """Return sum of square deviations of sequence data. + + If ``c`` is None, the mean is calculated in one pass, and the deviations + from the mean are calculated in a second pass. Otherwise, deviations are + calculated from ``c`` as given. Use the second case with care, as it can + lead to garbage results. + """ + if c is None: + c = mean(data) + T, total, count = _sum((x-c)**2 for x in data) + # The following sum should mathematically equal zero, but due to rounding + # error may not. + U, total2, count2 = _sum((x-c) for x in data) + assert T == U and count == count2 + total -= total2**2/len(data) + assert not total < 0, 'negative sum of square deviations: %f' % total + return (T, total) + + +def variance(data, xbar=None): + """Return the sample variance of data. + + data should be an iterable of Real-valued numbers, with at least two + values. The optional argument xbar, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. + + Examples: + + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 + + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: + + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 + + This function does not check that ``xbar`` is actually the mean of + ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or + impossible results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('31.01875') + + >>> from fractions import Fraction as F + >>> variance([F(1, 6), F(1, 2), F(5, 3)]) + Fraction(67, 108) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 2: + raise StatisticsError('variance requires at least two data points') + T, ss = _ss(data, xbar) + return _convert(ss/(n-1), T) + + +def pvariance(data, mu=None): + """Return the population variance of ``data``. + + data should be an iterable of Real-valued numbers, with at least one + value. The optional argument mu, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. + + Use this function to calculate the variance from the entire population. + To estimate the variance from a sample, the ``variance`` function is + usually a better choice. + + Examples: + + >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] + >>> pvariance(data) + 1.25 + + If you have already calculated the mean of the data, you can pass it as + the optional second argument to avoid recalculating it: + + >>> mu = mean(data) + >>> pvariance(data, mu) + 1.25 + + This function does not check that ``mu`` is actually the mean of ``data``. + Giving arbitrary values for ``mu`` may lead to invalid or impossible + results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('24.815') + + >>> from fractions import Fraction as F + >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) + Fraction(13, 72) + + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('pvariance requires at least one data point') + T, ss = _ss(data, mu) + return _convert(ss/n, T) + + +def stdev(data, xbar=None): + """Return the square root of the sample variance. + + See ``variance`` for arguments and other details. + + >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 1.0810874155219827 + + """ + var = variance(data, xbar) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) + + +def pstdev(data, mu=None): + """Return the square root of the population variance. + + See ``pvariance`` for arguments and other details. + + >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 0.986893273527251 + + """ + var = pvariance(data, mu) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) |