You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nvim_config/typings/seaborn/_statistics.pyi

133 lines
5.8 KiB

"""
This type stub file was generated by pyright.
"""
"""Statistical transformations for visualization.
This module is currently private, but is being written to eventually form part
of the public API.
The classes should behave roughly in the style of scikit-learn.
- All data-independent parameters should be passed to the class constructor.
- Each class should impelment a default transformation that is exposed through
__call__. These are currently written for vector arguements, but I think
consuming a whole `plot_data` DataFrame and return it with transformed
variables would make more sense.
- Some class have data-dependent preprocessing that should be cached and used
multiple times (think defining histogram bins off all data and then counting
observations within each bin multiple times per data subsets). These currently
have unique names, but it would be good to have a common name. Not quite
`fit`, but something similar.
- Alternatively, the transform interface could take some information about grouping
variables and do a groupby internally.
- Some classes should define alternate transforms that might make the most sense
with a different function. For example, KDE usually evaluates the distribution
on a regular grid, but it would be useful for it to transform at the actual
datapoints. Then again, this could be controlled by a parameter at the time of
class instantiation.
"""
class KDE:
"""Univariate and bivariate kernel density estimator."""
def __init__(self, *, bw_method=..., bw_adjust=..., gridsize=..., cut=..., clip=..., cumulative=...) -> None:
"""Initialize the estimator with its parameters.
Parameters
----------
bw_method : string, scalar, or callable, optional
Method for determining the smoothing bandwidth to use; passed to
:class:`scipy.stats.gaussian_kde`.
bw_adjust : number, optional
Factor that multiplicatively scales the value chosen using
``bw_method``. Increasing will make the curve smoother. See Notes.
gridsize : int, optional
Number of points on each dimension of the evaluation grid.
cut : number, optional
Factor, multiplied by the smoothing bandwidth, that determines how
far the evaluation grid extends past the extreme datapoints. When
set to 0, truncate the curve at the data limits.
clip : pair of numbers None, or a pair of such pairs
Do not evaluate the density outside of these limits.
cumulative : bool, optional
If True, estimate a cumulative distribution function.
"""
...
def define_support(self, x1, x2=..., weights=..., cache=...): # -> NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]:
"""Create the evaluation grid for a given data set."""
...
def __call__(self, x1, x2=..., weights=...): # -> tuple[NDArray[Unknown] | Unknown, NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]] | tuple[NDArray[float64] | Unknown, NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]]:
"""Fit and evaluate on univariate or bivariate data."""
...
class Histogram:
"""Univariate and bivariate histogram estimator."""
def __init__(self, stat=..., bins=..., binwidth=..., binrange=..., discrete=..., cumulative=...) -> None:
"""Initialize the estimator with its parameters.
Parameters
----------
stat : {"count", "frequency", "density", "probability"}
Aggregate statistic to compute in each bin.
- ``count`` shows the number of observations
- ``frequency`` shows the number of observations divided by the bin width
- ``density`` normalizes counts so that the area of the histogram is 1
- ``probability`` normalizes counts so that the sum of the bar heights is 1
bins : str, number, vector, or a pair of such values
Generic bin parameter that can be the name of a reference rule,
the number of bins, or the breaks of the bins.
Passed to :func:`numpy.histogram_bin_edges`.
binwidth : number or pair of numbers
Width of each bin, overrides ``bins`` but can be used with
``binrange``.
binrange : pair of numbers or a pair of pairs
Lowest and highest value for bin edges; can be used either
with ``bins`` or ``binwidth``. Defaults to data extremes.
discrete : bool or pair of bools
If True, set ``binwidth`` and ``binrange`` such that bin
edges cover integer values in the dataset.
cumulative : bool
If True, return the cumulative statistic.
"""
...
def define_bin_edges(self, x1, x2=..., weights=..., cache=...): # -> NDArray[Any] | tuple[Unknown, ...]:
"""Given data, return the edges of the histogram bins."""
...
def __call__(self, x1, x2=..., weights=...): # -> tuple[Any | ndarray[Any, Any] | NDArray[float64] | NDArray[Any], Unknown | NDArray[Any] | tuple[Unknown, ...]] | tuple[Unknown, Unknown | NDArray[Any] | tuple[Unknown, ...]]:
"""Count the occurrances in each bin, maybe normalize."""
...
class ECDF:
"""Univariate empirical cumulative distribution estimator."""
def __init__(self, stat=..., complementary=...) -> None:
"""Initialize the class with its paramters
Parameters
----------
stat : {{"proportion", "count"}}
Distribution statistic to compute.
complementary : bool
If True, use the complementary CDF (1 - CDF)
"""
...
def __call__(self, x1, x2=..., weights=...): # -> tuple[Any, Any]:
"""Return proportion or count of observations below each sorted datapoint."""
...