Source code for sympy.statistics.distributions

from __future__ import print_function, division

from sympy.core import sympify, Lambda, Dummy, Integer, Rational, oo, Float, pi
from sympy.core.compatibility import xrange
from sympy.functions import sqrt, exp, erf
from sympy.printing import sstr
from sympy.utilities import default_sort_key

import random


[docs]class Sample(tuple):
    """
    Sample([x1, x2, x3, ...]) represents a collection of samples.
    Sample parameters like mean, variance and stddev can be accessed as
    properties.
    The sample will be sorted.

    Examples
    ========

        >>> from sympy.statistics.distributions import Sample
        >>> Sample([0, 1, 2, 3])
        Sample([0, 1, 2, 3])
        >>> Sample([8, 3, 2, 4, 1, 6, 9, 2])
        Sample([1, 2, 2, 3, 4, 6, 8, 9])
        >>> s = Sample([1, 2, 3, 4, 5])
        >>> s.mean
        3
        >>> s.stddev
        sqrt(2)
        >>> s.median
        3
        >>> s.variance
        2

    """
    def __new__(cls, sample):
        s = tuple.__new__(cls, sorted(sample, key=default_sort_key))
        s.mean = mean = sum(s) / Integer(len(s))
        s.variance = sum([(x - mean)**2 for x in s]) / Integer(len(s))
        s.stddev = sqrt(s.variance)
        if len(s) % 2:
            s.median = s[len(s)//2]
        else:
            s.median = sum(s[len(s)//2 - 1:len(s)//2 + 1]) / Integer(2)
        return s

    def __repr__(self):
        return sstr(self)

    def __str__(self):
        return sstr(self)


[docs]class ContinuousProbability(object):
    """Base class for continuous probability distributions"""

[docs]    def probability(s, a, b):
        """
        Calculate the probability that a random number x generated
        from the distribution satisfies a <= x <= b

        Examples
        ========

            >>> from sympy.statistics import Normal
            >>> from sympy.core import oo
            >>> Normal(0, 1).probability(-1, 1)
            erf(sqrt(2)/2)
            >>> Normal(0, 1).probability(1, oo)
            -erf(sqrt(2)/2)/2 + 1/2

        """
        return s.cdf(b) - s.cdf(a)

[docs]    def random(s, n=None):
        """
        random() -- generate a random number from the distribution.
        random(n) -- generate a Sample of n random numbers.

        Examples
        ========

            >>> from sympy.statistics import Uniform
            >>> x = Uniform(1, 5).random()
            >>> x < 5 and x > 1
            True
            >>> x = Uniform(-4, 2).random()
            >>> x < 2 and x > -4
            True

        """
        if n is None:
            return s._random()
        else:
            return Sample([s._random() for i in xrange(n)])

    def __repr__(self):
        return sstr(self)

    def __str__(self):
        return sstr(self)


[docs]class Normal(ContinuousProbability):
    """
    Normal(mu, sigma) represents the normal or Gaussian distribution
    with mean value mu and standard deviation sigma.

    Examples
    ========

        >>> from sympy.statistics import Normal
        >>> from sympy import oo
        >>> N = Normal(1, 2)
        >>> N.mean
        1
        >>> N.variance
        4
        >>> N.probability(-oo, 1)   # probability on an interval
        1/2
        >>> N.probability(1, oo)
        1/2
        >>> N.probability(-oo, oo)
        1
        >>> N.probability(-1, 3)
        erf(sqrt(2)/2)
        >>> _.evalf()
        0.682689492137086

    """
    def __init__(self, mu, sigma):
        self.mu = sympify(mu)
        self.sigma = sympify(sigma)

    mean = property(lambda s: s.mu)
    median = property(lambda s: s.mu)
    mode = property(lambda s: s.mu)
    stddev = property(lambda s: s.sigma)
    variance = property(lambda s: s.sigma**2)

[docs]    def pdf(s, x):
        """
        Return the probability density function as an expression in x

        Examples
        ========

            >>> from sympy.statistics import Normal
            >>> Normal(1, 2).pdf(0)
            sqrt(2)*exp(-1/8)/(4*sqrt(pi))
            >>> from sympy.abc import x
            >>> Normal(1, 2).pdf(x)
            sqrt(2)*exp(-(x - 1)**2/8)/(4*sqrt(pi))

        """
        x = sympify(x)
        return 1/(s.sigma*sqrt(2*pi)) * exp(-(x - s.mu)**2 / (2*s.sigma**2))

[docs]    def cdf(s, x):
        """
        Return the cumulative density function as an expression in x

        Examples
        ========

            >>> from sympy.statistics import Normal
            >>> Normal(1, 2).cdf(0)
            -erf(sqrt(2)/4)/2 + 1/2
            >>> from sympy.abc import x
            >>> Normal(1, 2).cdf(x)
            erf(sqrt(2)*(x - 1)/4)/2 + 1/2

        """
        x = sympify(x)
        return (1 + erf((x - s.mu)/(s.sigma*sqrt(2))))/2

    def _random(s):
        return random.gauss(float(s.mu), float(s.sigma))

[docs]    def confidence(s, p):
        """Return a symmetric (p*100)% confidence interval. For example,
        p=0.95 gives a 95% confidence interval. Currently this function
        only handles numerical values except in the trivial case p=1.

        For example, one standard deviation:

            >>> from sympy.statistics import Normal
            >>> N = Normal(0, 1)
            >>> N.confidence(0.68)
            (-0.994457883209753, 0.994457883209753)
            >>> N.probability(*_).evalf()
            0.680000000000000

        Two standard deviations:

            >>> N = Normal(0, 1)
            >>> N.confidence(0.95)
            (-1.95996398454005, 1.95996398454005)
            >>> N.probability(*_).evalf()
            0.950000000000000

        """

        if p == 1:
            return (-oo, oo)

        if p > 1:
            raise ValueError("p cannot be greater than 1")

        # In terms of n*sigma, we have n = sqrt(2)*ierf(p). The inverse
        # error function is not yet implemented in SymPy but can easily be
        # computed numerically

        from sympy.mpmath import mpf, erfinv

        # calculate y = ierf(p) by solving erf(y) - p = 0
        y = erfinv(mpf(p))
        t = Float(str(mpf(float(s.sigma)) * mpf(2)**0.5 * y))
        mu = s.mu.evalf()
        return (mu - t, mu + t)

    @staticmethod
[docs]    def fit(sample):
        """
        Create a normal distribution fit to the mean and standard
        deviation of the given distribution or sample.

        Examples
        ========

            >>> from sympy.statistics import Normal
            >>> Normal.fit([1,2,3,4,5])
            Normal(3, sqrt(2))
            >>> from sympy.abc import x, y
            >>> Normal.fit([x, y])
            Normal(x/2 + y/2, sqrt((-x/2 + y/2)**2/2 + (x/2 - y/2)**2/2))

        """
        if not hasattr(sample, "stddev"):
            sample = Sample(sample)
        return Normal(sample.mean, sample.stddev)


[docs]class Uniform(ContinuousProbability):
    """
    Uniform(a, b) represents a probability distribution with uniform
    probability density on the interval [a, b] and zero density
    everywhere else.
    """
    def __init__(self, a, b):
        self.a = sympify(a)
        self.b = sympify(b)

    mean = property(lambda s: (s.a + s.b)/2)
    median = property(lambda s: (s.a + s.b)/2)
    mode = property(lambda s: (s.a + s.b)/2)  # arbitrary
    variance = property(lambda s: (s.b - s.a)**2 / 12)
    stddev = property(lambda s: sqrt(s.variance))

[docs]    def pdf(s, x):
        """
        Return the probability density function as an expression in x

        Examples
        ========

            >>> from sympy.statistics import Uniform
            >>> Uniform(1, 5).pdf(1)
            1/4
            >>> Uniform(2, 4).pdf(2)
            1/2

        """
        x = sympify(x)
        if not x.is_Number:
            raise NotImplementedError("SymPy does not yet support "
                "piecewise functions")
        if x < s.a or x > s.b:
            return Rational(0)
        return 1/(s.b - s.a)

[docs]    def cdf(s, x):
        """
        Return the cumulative density function as an expression in x

        Examples
        ========

            >>> from sympy.statistics import Uniform
            >>> Uniform(1, 5).cdf(2)
            1/4
            >>> Uniform(1, 5).cdf(4)
            3/4

        """
        x = sympify(x)
        if not x.is_Number:
            raise NotImplementedError("SymPy does not yet support "
                "piecewise functions")
        if x <= s.a:
            return Rational(0)
        if x >= s.b:
            return Rational(1)
        return (x - s.a)/(s.b - s.a)

    def _random(s):
        return Float(random.uniform(float(s.a), float(s.b)))

[docs]    def confidence(s, p):
        """Generate a symmetric (p*100)% confidence interval.

        >>> from sympy import Rational
        >>> from sympy.statistics import Uniform
        >>> U = Uniform(1, 2)
        >>> U.confidence(1)
        (1, 2)
        >>> U.confidence(Rational(1,2))
        (5/4, 7/4)

        """
        p = sympify(p)
        if p > 1:
            raise ValueError("p cannot be greater than 1")

        d = (s.b - s.a)*p / 2
        return (s.mean - d, s.mean + d)

    @staticmethod
[docs]    def fit(sample):
        """
        Create a uniform distribution fit to the mean and standard
        deviation of the given distribution or sample.

        Examples
        ========

            >>> from sympy.statistics import Uniform
            >>> Uniform.fit([1, 2, 3, 4, 5])
            Uniform(-sqrt(6) + 3, sqrt(6) + 3)
            >>> Uniform.fit([1, 2])
            Uniform(-sqrt(3)/2 + 3/2, sqrt(3)/2 + 3/2)

        """
        if not hasattr(sample, "stddev"):
            sample = Sample(sample)
        m = sample.mean
        d = sqrt(12*sample.variance)/2
        return Uniform(m - d, m + d)


[docs]class PDF(ContinuousProbability):
    """
    PDF(func, (x, a, b)) represents continuous probability distribution
    with probability distribution function func(x) on interval (a, b)

    If func is not normalized so that integrate(func, (x, a, b)) == 1,
    it can be normalized using PDF.normalize() method

    Examples
    ========

        >>> from sympy import Symbol, exp, oo
        >>> from sympy.statistics.distributions import PDF
        >>> from sympy.abc import x
        >>> a = Symbol('a', positive=True)

        >>> exponential = PDF(exp(-x/a)/a, (x,0,oo))
        >>> exponential.pdf(x)
        exp(-x/a)/a
        >>> exponential.cdf(x)
        1 - exp(-x/a)
        >>> exponential.mean
        a
        >>> exponential.variance
        a**2

    """

    def __init__(self, pdf, var):
        #XXX maybe add some checking of parameters
        if isinstance(var, (tuple, list)):
            self.pdf = Lambda(var[0], pdf)
            self.domain = tuple(var[1:])
        else:
            self.pdf = Lambda(var, pdf)
            self.domain = (-oo, oo)
        self._cdf = None
        self._mean = None
        self._variance = None
        self._stddev = None

[docs]    def normalize(self):
        """
        Normalize the probability distribution function so that
        integrate(self.pdf(x), (x, a, b)) == 1

        Examples
        ========

            >>> from sympy import Symbol, exp, oo
            >>> from sympy.statistics.distributions import PDF
            >>> from sympy.abc import x
            >>> a = Symbol('a', positive=True)

            >>> exponential = PDF(exp(-x/a), (x,0,oo))
            >>> exponential.normalize().pdf(x)
            exp(-x/a)/a

        """

        norm = self.probability(*self.domain)
        if norm != 1:
            w = Dummy('w', real=True)
            return self.__class__(self.pdf(w)/norm, (w, self.domain[0], self.domain[1]))
            #self._cdf = Lambda(w, (self.cdf(w) - self.cdf(self.domain[0]))/norm)
            #if self._mean is not None:
            #    self._mean /= norm
            #if self._variance is not None:
            #    self._variance = (self._variance + (self._mean*norm)**2)/norm - self.mean**2
            #if self._stddev is not None:
            #    self._stddev = sqrt(self._variance)
        else:
            return self

[docs]    def cdf(self, x):
        """
        Return the cumulative density function as an expression in x

        Examples
        ========

            >>> from sympy.statistics.distributions import PDF
            >>> from sympy import exp, oo
            >>> from sympy.abc import x, y
            >>> PDF(exp(-x/y), (x,0,oo)).cdf(4)
            y - y*exp(-4/y)
            >>> PDF(2*x + y, (x, 10, oo)).cdf(0)
            -10*y - 100

        """
        x = sympify(x)
        if self._cdf is not None:
            return self._cdf(x)
        else:
            from sympy import integrate
            w = Dummy('w', real=True)
            self._cdf = integrate(self.pdf(w), w)
            self._cdf = Lambda(
                w, self._cdf - self._cdf.subs(w, self.domain[0]))
            return self._cdf(x)

    def _get_mean(self):
        if self._mean is not None:
            return self._mean
        else:
            from sympy import integrate
            w = Dummy('w', real=True)
            self._mean = integrate(
                self.pdf(w)*w, (w, self.domain[0], self.domain[1]))
            return self._mean

    def _get_variance(self):
        if self._variance is not None:
            return self._variance
        else:
            from sympy import integrate, simplify
            w = Dummy('w', real=True)
            self._variance = integrate(self.pdf(
                w)*w**2, (w, self.domain[0], self.domain[1])) - self.mean**2
            self._variance = simplify(self._variance)
            return self._variance

    def _get_stddev(self):
        if self._stddev is not None:
            return self._stddev
        else:
            self._stddev = sqrt(self.variance)
            return self._stddev

    mean = property(_get_mean)
    variance = property(_get_variance)
    stddev = property(_get_stddev)

    def _random(s):
        raise NotImplementedError

[docs]    def transform(self, func, var):
        """
        Return a probability distribution of random variable func(x)
        currently only some simple injective functions are supported

        Examples
        ========

            >>> from sympy.statistics.distributions import PDF
            >>> from sympy import oo
            >>> from sympy.abc import x, y
            >>> PDF(2*x + y, (x, 10, oo)).transform(x, y)
            PDF(0, ((_w,), x, x))

        """

        w = Dummy('w', real=True)

        from sympy import solve
        from sympy import S
        inverse = solve(func - w, var)
        newPdf = S.Zero
        funcdiff = func.diff(var)
        #TODO check if x is in domain
        for x in inverse:
            # this assignment holds only for x in domain
            # in general it would require implementing
            # piecewise defined functions in sympy
            newPdf += (self.pdf(var)/abs(funcdiff)).subs(var, x)

        return PDF(newPdf, (w, func.subs(var, self.domain[0]), func.subs(var, self.domain[1])))
Navigation

Source code for sympy.statistics.distributions

Quick search

Navigation