U
    9%eE                    @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	 ddl
ZddlmZ ddlmZ ddlmZmZmZmZ dd	lmZmZmZmZ dd
lmZmZmZmZ ddlm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z* ddl+m,Z, dd Z-dddddddddd	ddZ.e#dgdgddgddge"dddddhge!edddddge!e	dddddgd gddge!eddddgedgd gd!gd gd"dd#ddddddddddddd$d%d&Z/dddddddddddd'd(d)Z0dFd*d+Z1d,d- Z2dGd.d/Z3dHdd0ddddd1ddddd0dd0d0d0dddd2d3d4d5d6Z4e#dge"dd7hgd ge!eddddgd8dd#d9d:dddddddddddd;d<d=Z5G d>d? d?eeZ6G d@dA dAe6eZ7G dBdC dCe6eZ8G dDdE dEe6eZ9dS )Iz Dictionary learning.
    N)ceil)IntegralReal)effective_n_jobs)linalg   )BaseEstimatorClassNamePrefixFeaturesOutMixinTransformerMixin_fit_context)LarsLasso	LassoLarsorthogonal_mp_gram)check_arraycheck_random_stategen_batchesgen_even_slices)HiddenInterval
StrOptionsvalidate_params)randomized_svd	row_normssvd_flip)Paralleldelayed)check_is_fittedc                 C   s   |r| dkrt d| d S )N)omplarsz9Positive constraint not supported for '{}' coding method.)
ValueErrorformatmethodpositive r%   c/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/sklearn/decomposition/_dict_learning.py_check_positive_coding   s    r'   
lasso_larsT  F	gramcov	algorithmregularizationcopy_covinitmax_iterverboser$   c       	      	   C   s  | j \}}|j d }|dkr|t|| }z@tjdd}t|d|	|d|
|d}|j|j| j|d |j}W 5 tjf | X n4|dkrt|| }t|d||d	|
d
}|dk	r|j	d st
|}||_|j|j| jdd |j}n|dkr>z@tjdd}td|	|t|dd}|j|j| j|d |j}W 5 tjf | X nr|dkrt|tt|| d j}|
rtj|dd|d n,|dkrt||t|dt| d	d|dj}|||S )a	  Generic sparse coding with precomputed Gram and/or covariance matrices.

    Each row of the result is the solution to a Lasso problem.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features)
        Data matrix.

    dictionary : ndarray of shape (n_components, n_features)
        The dictionary matrix against which to solve the sparse coding of
        the data. Some of the algorithms assume normalized rows.

    gram : ndarray of shape (n_components, n_components), default=None
        Precomputed Gram matrix, `dictionary * dictionary'`
        gram can be `None` if method is 'threshold'.

    cov : ndarray of shape (n_components, n_samples), default=None
        Precomputed covariance, `dictionary * X'`.

    algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'},             default='lasso_lars'
        The algorithm used:

        * `'lars'`: uses the least angle regression method
          (`linear_model.lars_path`);
        * `'lasso_lars'`: uses Lars to compute the Lasso solution;
        * `'lasso_cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if
          the estimated components are sparse;
        * `'omp'`: uses orthogonal matching pursuit to estimate the sparse
          solution;
        * `'threshold'`: squashes to zero all coefficients less than
          regularization from the projection `dictionary * data'`.

    regularization : int or float, default=None
        The regularization parameter. It corresponds to alpha when
        algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.
        Otherwise it corresponds to `n_nonzero_coefs`.

    init : ndarray of shape (n_samples, n_components), default=None
        Initialization value of the sparse code. Only used if
        `algorithm='lasso_cd'`.

    max_iter : int, default=1000
        Maximum number of iterations to perform if `algorithm='lasso_cd'` or
        `'lasso_lars'`.

    copy_cov : bool, default=True
        Whether to copy the precomputed covariance matrix; if `False`, it may
        be overwritten.

    verbose : int, default=0
        Controls the verbosity; the higher, the more messages.

    positive: bool, default=False
        Whether to enforce a positivity constraint on the sparse code.

        .. versionadded:: 0.20

    Returns
    -------
    code : ndarray of shape (n_components, n_features)
        The sparse codes.
    r   r(   ignore)allF)alphafit_interceptr2   
precomputefit_pathr$   r1   )Xylasso_cdT)r5   r6   r7   r1   Z
warm_startr$   NZ	WRITEABLE)check_inputr   )r6   r2   r7   n_nonzero_coefsr8   	thresholdoutr   )Zsquared)ZGramr9   r<   tolZnorms_squaredZcopy_Xy)shapefloatnpZseterrr   fitTZcoef_r   flagsarrayr   intsignmaximumabsclipr   r   Zreshape)X
dictionaryr+   r,   r-   r.   r/   r0   r1   r2   r$   	n_samples
n_featuresn_componentsr5   Zerr_mgtr(   Znew_codeZclfr   r%   r%   r&   _sparse_encode_precomputed&   sz    O

	
	




"

	rR   z
array-liker:   r   r   r=      leftclosedbooleanr2   rM   rN   r+   r,   r-   r<   r5   r/   r0   r1   n_jobsr;   r2   r$   Zprefer_skip_nested_validation)r+   r,   r-   r<   r5   r/   r0   r1   rY   r;   r2   r$   c                C   s   |rJ|dkr:t |dtjtjgd}t | dtjtjgd} nt |}t | } |jd | jd krrtd|j| jt|| t| |||||||||	|
||dS )aF  Sparse coding.

    Each row of the result is the solution to a sparse coding problem.
    The goal is to find a sparse array `code` such that::

        X ~= code * dictionary

    Read more in the :ref:`User Guide <SparseCoder>`.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Data matrix.

    dictionary : array-like of shape (n_components, n_features)
        The dictionary matrix against which to solve the sparse coding of
        the data. Some of the algorithms assume normalized rows for meaningful
        output.

    gram : array-like of shape (n_components, n_components), default=None
        Precomputed Gram matrix, `dictionary * dictionary'`.

    cov : array-like of shape (n_components, n_samples), default=None
        Precomputed covariance, `dictionary' * X`.

    algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'},             default='lasso_lars'
        The algorithm used:

        * `'lars'`: uses the least angle regression method
          (`linear_model.lars_path`);
        * `'lasso_lars'`: uses Lars to compute the Lasso solution;
        * `'lasso_cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if
          the estimated components are sparse;
        * `'omp'`: uses orthogonal matching pursuit to estimate the sparse
          solution;
        * `'threshold'`: squashes to zero all coefficients less than
          regularization from the projection `dictionary * data'`.

    n_nonzero_coefs : int, default=None
        Number of nonzero coefficients to target in each column of the
        solution. This is only used by `algorithm='lars'` and `algorithm='omp'`
        and is overridden by `alpha` in the `omp` case. If `None`, then
        `n_nonzero_coefs=int(n_features / 10)`.

    alpha : float, default=None
        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the
        penalty applied to the L1 norm.
        If `algorithm='threshold'`, `alpha` is the absolute value of the
        threshold below which coefficients will be squashed to zero.
        If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of
        the reconstruction error targeted. In this case, it overrides
        `n_nonzero_coefs`.
        If `None`, default to 1.

    copy_cov : bool, default=True
        Whether to copy the precomputed covariance matrix; if `False`, it may
        be overwritten.

    init : ndarray of shape (n_samples, n_components), default=None
        Initialization value of the sparse codes. Only used if
        `algorithm='lasso_cd'`.

    max_iter : int, default=1000
        Maximum number of iterations to perform if `algorithm='lasso_cd'` or
        `'lasso_lars'`.

    n_jobs : int, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    check_input : bool, default=True
        If `False`, the input arrays X and dictionary will not be checked.

    verbose : int, default=0
        Controls the verbosity; the higher, the more messages.

    positive : bool, default=False
        Whether to enforce positivity when finding the encoding.

        .. versionadded:: 0.20

    Returns
    -------
    code : ndarray of shape (n_samples, n_components)
        The sparse codes.

    See Also
    --------
    sklearn.linear_model.lars_path : Compute Least Angle Regression or Lasso
        path using LARS algorithm.
    sklearn.linear_model.orthogonal_mp : Solves Orthogonal Matching Pursuit problems.
    sklearn.linear_model.Lasso : Train Linear Model with L1 prior as regularizer.
    SparseCoder : Find a sparse representation of data from a fixed precomputed
        dictionary.
    r:   C)orderdtyperS   zRDictionary and X have different numbers of features:dictionary.shape: {} X.shape{}r+   r,   r-   r<   r5   r/   r0   r1   rY   r2   r$   )	r   rC   float64float32rA   r    r!   r'   _sparse_encoderX   r%   r%   r&   sparse_encode   sD       
 
rb   r^   c                   sL   j \}}j d }dkr>|		dkrNtt|d d|	n|		dkrNd	dkrldkrltjdkrdkrd	t jt|
dksdkrt 	
d
}|S  j d }j d }t||f}t	t
|t|
}t|

d 	
fdd|D }t||D ]\}}|||< q4|S )z1Sparse coding without input/parameter validation.r   )r   r   N
   rS   g      ?r=   r:   Fr*   )rY   r2   c                 3   s\   | ]T}t t | d k	r.d d |f nd 	d k	rF| nd 
dV  qd S )Nr*   )r   rR   ).0
this_slicerM   r-   r/   r,   rN   r+   r0   r1   r$   r.   r2   r%   r&   	<genexpr>  s   z!_sparse_encode.<locals>.<genexpr>)rA   minmaxrC   dotrE   r   rR   emptylistr   r   zip)rM   rN   r+   r,   r-   r<   r5   r/   r0   r1   rY   r2   r$   rO   rP   rQ   codeZslicesZ
code_viewsre   Z	this_viewr%   rf   r&   ra   z  sN    



(ra   c              	   C   sF  |j \}}	t|}|dkr$|j| }|dkr6|j| }d}
t|	D ]}|||f dkr| |  |dd|f || |   |||f  7  < nV||| }d| pd }|jd|t|d}|| | |< d|dd|f< |
d7 }
|rtj	| | dd| | d | |  t
t| | d  < qB|rB|
dkrBt|
 d dS )	a  Update the dense dictionary factor in place.

    Parameters
    ----------
    dictionary : ndarray of shape (n_components, n_features)
        Value of the dictionary at the previous iteration.

    Y : ndarray of shape (n_samples, n_features)
        Data matrix.

    code : ndarray of shape (n_samples, n_components)
        Sparse coding of the data against which to optimize the dictionary.

    A : ndarray of shape (n_components, n_components), default=None
        Together with `B`, sufficient stats of the online model to update the
        dictionary.

    B : ndarray of shape (n_features, n_components), default=None
        Together with `A`, sufficient stats of the online model to update the
        dictionary.

    verbose: bool, default=False
        Degree of output the procedure will print.

    random_state : int, RandomState instance or None, default=None
        Used for randomly initializing the dictionary. Pass an int for
        reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    positive : bool, default=False
        Whether to enforce positivity when finding the dictionary.

        .. versionadded:: 0.20
    Nr   gư>g{Gz?rS   )sizer>   z unused atoms resampled.)rA   r   rE   rangechoiceZstdnormallenrC   rL   ri   r   normprint)rN   Yrn   ABr2   random_stater$   rO   rQ   Zn_unusedkZnewdZnoise_levelnoiser%   r%   r&   _update_dict  s*    ,


6"r|   c                C   sp  t   }|dk	r,|dk	r,tj|dd}|}n8tj| dd\}}}t||\}}|ddtjf | }t|}||kr|ddd|f }|d|ddf }nBtj|t	t||| ff }tj
|t	|| |jd ff }t|}g }tj}|
dkr
tddd	 d
}t|D ]2}t   | }|
dkrJtjd tj  n|
rhtd|||d |f  t| ||||||||
d	}t|| ||
||d dt| ||  d  |tt|  }|| |dkr&|d |d
  }|||d
  k r&|
dkrtd n|
r td|   qL|d dkr|	dk	r|	t  q|rb||||d fS |||fS dS )z"Main dictionary learning algorithmNF)r\   F)Zfull_matricesrS   [dict_learning] end.zCIteration % 3i (elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)<   )r-   r5   r0   rY   r$   r1   r2   r2   ry   r$         ?r   r    z+--- Convergence reached after %d iterations   )timerC   rG   r   Zsvdr   newaxisrs   Zc_zerosr_rA   Zasfortranarraynanru   rp   sysstdoutwriteflushrb   r|   sumrK   appendlocals)rM   rQ   r5   r1   r@   r#   rY   	dict_init	code_initcallbackr2   ry   return_n_iterpositive_dictpositive_codemethod_max_itert0rn   rN   SrerrorsZcurrent_costiidtZdEr%   r%   r&   _dict_learning  s     







r   c                 C   s>   | dkr6d| d}|r&|d| 7 }t |t | S |S d S )N
deprecated'zB' is deprecated in version 1.1 and will be removed in version 1.4.r   )warningswarnFutureWarning)paramnamedefaultadditional_messagemsgr%   r%   r&   _check_warn_deprecated  s    
r   r      MbP?rc   )r5   n_iterr1   return_coder   r   
batch_sizer2   shufflerY   r#   iter_offsetry   return_inner_statsinner_statsr   r   r   r   r@   max_no_improvementc          -      C   s  ||||f}|dk	r.t dd |D s.tdt|ddd}t|dd	d
d}t|ddd}t|dd	dd}|dk	rd| }t|||||||
||||||||	|||d| }|s|jS || }||jfS t|dddd}|dkr| jd }|dkrtdt|| d| }t		 }| j\}}t
|}t|}|dk	r@|}n*t| ||d\}} }| ddtjf | }t|}!||!kr|d|ddf }n(tj|tj||! |jd f|jdf }|	dkrtddd |
r|  }"||" n| }"t|"dtjtjgd	d}"t|d|"jd	d}tj|dd }t||}#t|#}#|dkrntj||f|"jd}$tj||f|"jd}%n|d  }$|d  }%|d }&tt||| |#D ]B\}&}'|"|' }(t		 | })|	dkrtj !d! tj "  n<|	r|	d"ks|&t#d#|	  dkrtd$|&|)|)d% f  t$|(||||d	|||	d&	}*|&|d k rXt
|&d | }+nt
|d' |& d | }+|+d | |+d  },|$|,9 }$|$t%|*j&|*7 }$|%|,9 }%|%t%|(j&|*7 }%t'||(|*|$|%|	||d( |dk	r|t(  q|r|r
||$|%f|&| d fS ||$|%ffS |r|	dkr4td)dd n|	dkrJtd*dd t$| ||||d	|||	d&	}|	dkrt		 | })td+|)|)d% f  |r|||&| d fS ||fS |r||&| d fS |S dS ),a[  Solve a dictionary learning matrix factorization problem online.

    Finds the best dictionary and the corresponding sparse code for
    approximating the data matrix X by solving::

        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1
                     (U,V)
                     with || V_k ||_2 = 1 for all  0 <= k < n_components

    where V is the dictionary and U is the sparse code. ||.||_Fro stands for
    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm
    which is the sum of the absolute values of all the entries in the matrix.
    This is accomplished by repeatedly iterating over mini-batches by slicing
    the input data.

    Read more in the :ref:`User Guide <DictionaryLearning>`.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features)
        Data matrix.

    n_components : int or None, default=2
        Number of dictionary atoms to extract. If None, then ``n_components``
        is set to ``n_features``.

    alpha : float, default=1
        Sparsity controlling parameter.

    n_iter : int, default=100
        Number of mini-batch iterations to perform.

        .. deprecated:: 1.1
           `n_iter` is deprecated in 1.1 and will be removed in 1.4. Use
           `max_iter` instead.

    max_iter : int, default=None
        Maximum number of iterations over the complete dataset before
        stopping independently of any early stopping criterion heuristics.
        If ``max_iter`` is not None, ``n_iter`` is ignored.

        .. versionadded:: 1.1

    return_code : bool, default=True
        Whether to also return the code U or just the dictionary `V`.

    dict_init : ndarray of shape (n_components, n_features), default=None
        Initial values for the dictionary for warm restart scenarios.
        If `None`, the initial values for the dictionary are created
        with an SVD decomposition of the data via
        :func:`~sklearn.utils.extmath.randomized_svd`.

    callback : callable, default=None
        A callable that gets invoked at the end of each iteration.

    batch_size : int, default=256
        The number of samples to take in each batch.

        .. versionchanged:: 1.3
           The default value of `batch_size` changed from 3 to 256 in version 1.3.

    verbose : bool, default=False
        To control the verbosity of the procedure.

    shuffle : bool, default=True
        Whether to shuffle the data before splitting it in batches.

    n_jobs : int, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    method : {'lars', 'cd'}, default='lars'
        * `'lars'`: uses the least angle regression method to solve the lasso
          problem (`linear_model.lars_path`);
        * `'cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). Lars will be faster if
          the estimated components are sparse.

    iter_offset : int, default=0
        Number of previous iterations completed on the dictionary used for
        initialization.

        .. deprecated:: 1.1
           `iter_offset` serves internal purpose only and will be removed in 1.4.

    random_state : int, RandomState instance or None, default=None
        Used for initializing the dictionary when ``dict_init`` is not
        specified, randomly shuffling the data when ``shuffle`` is set to
        ``True``, and updating the dictionary. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    return_inner_stats : bool, default=False
        Return the inner statistics A (dictionary covariance) and B
        (data approximation). Useful to restart the algorithm in an
        online setting. If `return_inner_stats` is `True`, `return_code` is
        ignored.

        .. deprecated:: 1.1
           `return_inner_stats` serves internal purpose only and will be removed in 1.4.

    inner_stats : tuple of (A, B) ndarrays, default=None
        Inner sufficient statistics that are kept by the algorithm.
        Passing them at initialization is useful in online settings, to
        avoid losing the history of the evolution.
        `A` `(n_components, n_components)` is the dictionary covariance matrix.
        `B` `(n_features, n_components)` is the data approximation matrix.

        .. deprecated:: 1.1
           `inner_stats` serves internal purpose only and will be removed in 1.4.

    return_n_iter : bool, default=False
        Whether or not to return the number of iterations.

        .. deprecated:: 1.1
           `return_n_iter` will be removed in 1.4 and n_iter will never be returned.

    positive_dict : bool, default=False
        Whether to enforce positivity when finding the dictionary.

        .. versionadded:: 0.20

    positive_code : bool, default=False
        Whether to enforce positivity when finding the code.

        .. versionadded:: 0.20

    method_max_iter : int, default=1000
        Maximum number of iterations to perform when solving the lasso problem.

        .. versionadded:: 0.22

    tol : float, default=1e-3
        Control early stopping based on the norm of the differences in the
        dictionary between 2 steps. Used only if `max_iter` is not None.

        To disable early stopping based on changes in the dictionary, set
        `tol` to 0.0.

        .. versionadded:: 1.1

    max_no_improvement : int, default=10
        Control early stopping based on the consecutive number of mini batches
        that does not yield an improvement on the smoothed cost function. Used only if
        `max_iter` is not None.

        To disable convergence detection based on cost function, set
        `max_no_improvement` to None.

        .. versionadded:: 1.1

    Returns
    -------
    code : ndarray of shape (n_samples, n_components),
        The sparse code (only returned if `return_code=True`).

    dictionary : ndarray of shape (n_components, n_features),
        The solutions to the dictionary learning problem.

    n_iter : int
        Number of iterations run. Returned only if `return_n_iter` is
        set to `True`.

    See Also
    --------
    dict_learning : Solve a dictionary learning matrix factorization problem.
    DictionaryLearning : Find a dictionary that sparsely encodes data.
    MiniBatchDictionaryLearning : A faster, less accurate, version of the dictionary
        learning algorithm.
    SparsePCA : Sparse Principal Components Analysis.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    Nc                 s   s   | ]}|d kV  qdS )r   Nr%   )rd   argr%   r%   r&   rg   a  s     z'dict_learning_online.<locals>.<genexpr>zuThe following arguments are incompatible with 'max_iter': return_n_iter, return_inner_stats, iter_offset, inner_statsr   r   )r   r   Fz,From 1.4 inner_stats will never be returned.)r   r   r   r   zFrom 1.4 'n_iter' will never be returned. Refer to the 'n_iter_' and 'n_steps_' attributes of the MiniBatchDictionaryLearning object instead.lasso_)rQ   r5   r   rY   fit_algorithmr   r   r   ry   transform_algorithmtransform_alphar   r   transform_max_iterr2   r   r@   r   r   d   zUse 'max_iter' instead.rS   )r   cdz/Coding method not supported as a fit algorithm.ry   r]   r~   r   r   r[   r\   r]   copyr}   Wrequirementsr   rc         Y@z.Iteration % 3i (elapsed time: % 3is, % 4.1fmn)r   )r-   r5   rY   r;   r$   r1   r2   r   r   zLearning code...|z"done (total time: % 3is, % 4.1fmn)))r4   r    r   MiniBatchDictionaryLearningrD   components_	transformrA   r'   r   rB   r   r   rC   r   rs   r   r   r]   ru   r   r   r   r_   r`   requirer   	itertoolscyclerm   rp   r   r   r   r   r   rb   rj   rE   r|   r   )-rM   rQ   r5   r   r1   r   r   r   r   r2   r   rY   r#   r   ry   r   r   r   r   r   r   r@   r   depsr   Zestrn   r   rO   rP   rN   _r   r   X_trainbatchesrw   rx   r   batchZthis_Xr   Z	this_codethetabetar%   r%   r&   dict_learning_online  sR    I


   





  
 



 



r   r   )rM   r#   r   r   r   :0yE>)r1   r@   r#   rY   r   r   r   r2   ry   r   r   r   r   c                C   sR   t ||||||||	||
||||d}|| }|rD||j|j|jfS ||j|jfS )a0  Solve a dictionary learning matrix factorization problem.

    Finds the best dictionary and the corresponding sparse code for
    approximating the data matrix X by solving::

        (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1
                     (U,V)
                    with || V_k ||_2 = 1 for all  0 <= k < n_components

    where V is the dictionary and U is the sparse code. ||.||_Fro stands for
    the Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm
    which is the sum of the absolute values of all the entries in the matrix.

    Read more in the :ref:`User Guide <DictionaryLearning>`.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Data matrix.

    n_components : int
        Number of dictionary atoms to extract.

    alpha : int or float
        Sparsity controlling parameter.

    max_iter : int, default=100
        Maximum number of iterations to perform.

    tol : float, default=1e-8
        Tolerance for the stopping condition.

    method : {'lars', 'cd'}, default='lars'
        The method used:

        * `'lars'`: uses the least angle regression method to solve the lasso
           problem (`linear_model.lars_path`);
        * `'cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). Lars will be faster if
          the estimated components are sparse.

    n_jobs : int, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    dict_init : ndarray of shape (n_components, n_features), default=None
        Initial value for the dictionary for warm restart scenarios. Only used
        if `code_init` and `dict_init` are not None.

    code_init : ndarray of shape (n_samples, n_components), default=None
        Initial value for the sparse code for warm restart scenarios. Only used
        if `code_init` and `dict_init` are not None.

    callback : callable, default=None
        Callable that gets invoked every five iterations.

    verbose : bool, default=False
        To control the verbosity of the procedure.

    random_state : int, RandomState instance or None, default=None
        Used for randomly initializing the dictionary. Pass an int for
        reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    return_n_iter : bool, default=False
        Whether or not to return the number of iterations.

    positive_dict : bool, default=False
        Whether to enforce positivity when finding the dictionary.

        .. versionadded:: 0.20

    positive_code : bool, default=False
        Whether to enforce positivity when finding the code.

        .. versionadded:: 0.20

    method_max_iter : int, default=1000
        Maximum number of iterations to perform.

        .. versionadded:: 0.22

    Returns
    -------
    code : ndarray of shape (n_samples, n_components)
        The sparse code factor in the matrix factorization.

    dictionary : ndarray of shape (n_components, n_features),
        The dictionary factor in the matrix factorization.

    errors : array
        Vector of errors at each iteration.

    n_iter : int
        Number of iterations run. Returned only if `return_n_iter` is
        set to True.

    See Also
    --------
    dict_learning_online : Solve a dictionary learning matrix factorization
        problem online.
    DictionaryLearning : Find a dictionary that sparsely encodes data.
    MiniBatchDictionaryLearning : A faster, less accurate version
        of the dictionary learning algorithm.
    SparsePCA : Sparse Principal Components Analysis.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    )rQ   r5   r1   r@   r   rY   r   r   r   r2   ry   r   r   r   )DictionaryLearningfit_transformr   error_n_iter_)rM   rQ   r5   r1   r@   r#   rY   r   r   r   r2   ry   r   r   r   r   Z	estimatorrn   r%   r%   r&   dict_learning6  s2     

r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	_BaseSparseCodingz>Base class from SparseCoder and DictionaryLearning algorithms.c                 C   s.   || _ || _|| _|| _|| _|| _|| _d S N)r   transform_n_nonzero_coefsr   r   
split_signrY   r   )selfr   r   r   r   rY   r   r   r%   r%   r&   __init__  s    
z_BaseSparseCoding.__init__c              
   C   s   | j |dd}t| dr*| jdkr*| j}n| j}t||| j| j|| j| j| j	d}| j
r|j\}}t|d| f}t|d|ddd|f< t|d |dd|df< |}|S )zWPrivate method allowing to accommodate both DictionaryLearning and
        SparseCoder.F)resetr5   N)r-   r<   r5   r1   rY   r$   r   r   )_validate_datahasattrr   r5   rb   r   r   r   rY   r   r   rA   rC   rk   rJ   minimum)r   rM   rN   r   rn   rO   rP   Z
split_coder%   r%   r&   
_transform  s*    
z_BaseSparseCoding._transformc                 C   s   t |  | || jS )a  Encode the data as a sparse combination of the dictionary atoms.

        Coding method is determined by the object parameter
        `transform_algorithm`.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            Test data to be transformed, must have the same number of
            features as the data used to train the model.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_components)
            Transformed data.
        )r   r   r   r   rM   r%   r%   r&   r     s    z_BaseSparseCoding.transformN)__name__
__module____qualname____doc__r   r   r   r%   r%   r%   r&   r     s   r   c                       s|   e Zd ZdZdgZdddddddd fdd	
Zdd
dZd fdd	Zdd Ze	dd Z
e	dd Ze	dd Z  ZS )SparseCodera  Sparse coding.

    Finds a sparse representation of data against a fixed, precomputed
    dictionary.

    Each row of the result is the solution to a sparse coding problem.
    The goal is to find a sparse array `code` such that::

        X ~= code * dictionary

    Read more in the :ref:`User Guide <SparseCoder>`.

    Parameters
    ----------
    dictionary : ndarray of shape (n_components, n_features)
        The dictionary atoms used for sparse coding. Lines are assumed to be
        normalized to unit norm.

    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'
        Algorithm used to transform the data:

        - `'lars'`: uses the least angle regression method
          (`linear_model.lars_path`);
        - `'lasso_lars'`: uses Lars to compute the Lasso solution;
        - `'lasso_cd'`: uses the coordinate descent method to compute the
          Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if
          the estimated components are sparse;
        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse
          solution;
        - `'threshold'`: squashes to zero all coefficients less than alpha from
          the projection ``dictionary * X'``.

    transform_n_nonzero_coefs : int, default=None
        Number of nonzero coefficients to target in each column of the
        solution. This is only used by `algorithm='lars'` and `algorithm='omp'`
        and is overridden by `alpha` in the `omp` case. If `None`, then
        `transform_n_nonzero_coefs=int(n_features / 10)`.

    transform_alpha : float, default=None
        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the
        penalty applied to the L1 norm.
        If `algorithm='threshold'`, `alpha` is the absolute value of the
        threshold below which coefficients will be squashed to zero.
        If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of
        the reconstruction error targeted. In this case, it overrides
        `n_nonzero_coefs`.
        If `None`, default to 1.

    split_sign : bool, default=False
        Whether to split the sparse feature vector into the concatenation of
        its negative part and its positive part. This can improve the
        performance of downstream classifiers.

    n_jobs : int, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    positive_code : bool, default=False
        Whether to enforce positivity when finding the code.

        .. versionadded:: 0.20

    transform_max_iter : int, default=1000
        Maximum number of iterations to perform if `algorithm='lasso_cd'` or
        `lasso_lars`.

        .. versionadded:: 0.22

    Attributes
    ----------
    n_components_ : int
        Number of atoms.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    DictionaryLearning : Find a dictionary that sparsely encodes data.
    MiniBatchDictionaryLearning : A faster, less accurate, version of the
        dictionary learning algorithm.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    SparsePCA : Sparse Principal Components Analysis.
    sparse_encode : Sparse coding where each row of the result is the solution
        to a sparse coding problem.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.decomposition import SparseCoder
    >>> X = np.array([[-1, -1, -1], [0, 0, 3]])
    >>> dictionary = np.array(
    ...     [[0, 1, 0],
    ...      [-1, -1, 2],
    ...      [1, 1, 1],
    ...      [0, 1, 1],
    ...      [0, 2, 1]],
    ...    dtype=np.float64
    ... )
    >>> coder = SparseCoder(
    ...     dictionary=dictionary, transform_algorithm='lasso_lars',
    ...     transform_alpha=1e-10,
    ... )
    >>> coder.transform(X)
    array([[ 0.,  0., -1.,  0.,  0.],
           [ 0.,  1.,  1.,  0.,  0.]])
    rN   r   NFr)   )r   r   r   r   rY   r   r   c          	   	      s"   t  ||||||| || _d S r   )superr   rN   )	r   rN   r   r   r   r   rY   r   r   	__class__r%   r&   r     s    	zSparseCoder.__init__c                 C   s   | S )a  Do nothing and return the estimator unchanged.

        This method is just there to implement the usual API and hence
        work in pipelines.

        Parameters
        ----------
        X : Ignored
            Not used, present for API consistency by convention.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Returns the instance itself.
        r%   r   rM   yr%   r%   r&   rD     s    zSparseCoder.fitc                    s   t  || jS )aQ  Encode the data as a sparse combination of the dictionary atoms.

        Coding method is determined by the object parameter
        `transform_algorithm`.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_components)
            Transformed data.
        )r   r   rN   r   r   r%   r&   r     s    zSparseCoder.transformc                 C   s   dt jt jgdS )NF)Zrequires_fitpreserves_dtyperC   r_   r`   r   r%   r%   r&   
_more_tags  s    
zSparseCoder._more_tagsc                 C   s   | j jd S )zNumber of atoms.r   rN   rA   r   r%   r%   r&   n_components_  s    zSparseCoder.n_components_c                 C   s   | j jd S )z%Number of features seen during `fit`.rS   r   r   r%   r%   r&   n_features_in_  s    zSparseCoder.n_features_in_c                 C   s   | j S )&Number of transformed output features.)r   r   r%   r%   r&   _n_features_out  s    zSparseCoder._n_features_out)N)N)r   r   r   r   Z_required_parametersr   rD   r   r   propertyr   r   r   __classcell__r%   r%   r   r&   r   #  s&   w


r   c                       sN  e Zd ZU dZeedddddgeeddddgeeddddgeeddddgeddhged	d
dddhgeedddddgeedddddgedgej	dgej	dge
dgdgdgdgdgdgeeddddgdZeed< d"dddddddddddddddddd fddZd#ddZeddd$ddZedd Zd d! Z  ZS )%r   a  Dictionary learning.

    Finds a dictionary (a set of atoms) that performs well at sparsely
    encoding the fitted data.

    Solves the optimization problem::

        (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1
                    (U,V)
                    with || V_k ||_2 <= 1 for all  0 <= k < n_components

    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for
    the entry-wise matrix norm which is the sum of the absolute values
    of all the entries in the matrix.

    Read more in the :ref:`User Guide <DictionaryLearning>`.

    Parameters
    ----------
    n_components : int, default=None
        Number of dictionary elements to extract. If None, then ``n_components``
        is set to ``n_features``.

    alpha : float, default=1.0
        Sparsity controlling parameter.

    max_iter : int, default=1000
        Maximum number of iterations to perform.

    tol : float, default=1e-8
        Tolerance for numerical error.

    fit_algorithm : {'lars', 'cd'}, default='lars'
        * `'lars'`: uses the least angle regression method to solve the lasso
          problem (:func:`~sklearn.linear_model.lars_path`);
        * `'cd'`: uses the coordinate descent method to compute the
          Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be
          faster if the estimated components are sparse.

        .. versionadded:: 0.17
           *cd* coordinate descent method to improve speed.

    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'
        Algorithm used to transform the data:

        - `'lars'`: uses the least angle regression method
          (:func:`~sklearn.linear_model.lars_path`);
        - `'lasso_lars'`: uses Lars to compute the Lasso solution.
        - `'lasso_cd'`: uses the coordinate descent method to compute the
          Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`
          will be faster if the estimated components are sparse.
        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse
          solution.
        - `'threshold'`: squashes to zero all coefficients less than alpha from
          the projection ``dictionary * X'``.

        .. versionadded:: 0.17
           *lasso_cd* coordinate descent method to improve speed.

    transform_n_nonzero_coefs : int, default=None
        Number of nonzero coefficients to target in each column of the
        solution. This is only used by `algorithm='lars'` and
        `algorithm='omp'`. If `None`, then
        `transform_n_nonzero_coefs=int(n_features / 10)`.

    transform_alpha : float, default=None
        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the
        penalty applied to the L1 norm.
        If `algorithm='threshold'`, `alpha` is the absolute value of the
        threshold below which coefficients will be squashed to zero.
        If `None`, defaults to `alpha`.

        .. versionchanged:: 1.2
            When None, default value changed from 1.0 to `alpha`.

    n_jobs : int or None, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    code_init : ndarray of shape (n_samples, n_components), default=None
        Initial value for the code, for warm restart. Only used if `code_init`
        and `dict_init` are not None.

    dict_init : ndarray of shape (n_components, n_features), default=None
        Initial values for the dictionary, for warm restart. Only used if
        `code_init` and `dict_init` are not None.

    callback : callable, default=None
        Callable that gets invoked every five iterations.

        .. versionadded:: 1.3

    verbose : bool, default=False
        To control the verbosity of the procedure.

    split_sign : bool, default=False
        Whether to split the sparse feature vector into the concatenation of
        its negative part and its positive part. This can improve the
        performance of downstream classifiers.

    random_state : int, RandomState instance or None, default=None
        Used for initializing the dictionary when ``dict_init`` is not
        specified, randomly shuffling the data when ``shuffle`` is set to
        ``True``, and updating the dictionary. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    positive_code : bool, default=False
        Whether to enforce positivity when finding the code.

        .. versionadded:: 0.20

    positive_dict : bool, default=False
        Whether to enforce positivity when finding the dictionary.

        .. versionadded:: 0.20

    transform_max_iter : int, default=1000
        Maximum number of iterations to perform if `algorithm='lasso_cd'` or
        `'lasso_lars'`.

        .. versionadded:: 0.22

    Attributes
    ----------
    components_ : ndarray of shape (n_components, n_features)
        dictionary atoms extracted from the data

    error_ : array
        vector of errors at each iteration

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_iter_ : int
        Number of iterations run.

    See Also
    --------
    MiniBatchDictionaryLearning: A faster, less accurate, version of the
        dictionary learning algorithm.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    SparseCoder : Find a sparse representation of data from a fixed,
        precomputed dictionary.
    SparsePCA : Sparse Principal Components Analysis.

    References
    ----------

    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning
    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.datasets import make_sparse_coded_signal
    >>> from sklearn.decomposition import DictionaryLearning
    >>> X, dictionary, code = make_sparse_coded_signal(
    ...     n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
    ...     random_state=42,
    ... )
    >>> dict_learner = DictionaryLearning(
    ...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,
    ...     random_state=42,
    ... )
    >>> X_transformed = dict_learner.fit(X).transform(X)

    We can check the level of sparsity of `X_transformed`:

    >>> np.mean(X_transformed == 0)
    0.52...

    We can compare the average squared euclidean norm of the reconstruction
    error of the sparse coded signal relative to the squared euclidean norm of
    the original signal:

    >>> X_hat = X_transformed @ dict_learner.components_
    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
    0.05...
    rS   NrT   rU   r   r   r   r(   r:   r   r=   r2   rW   ry   )rQ   r5   r1   r@   r   r   r   r   rY   r   r   r   r2   r   ry   r   r   r   _parameter_constraintsr)   r   F)r5   r1   r@   r   r   r   r   rY   r   r   r   r2   r   ry   r   r   r   c             	      s^   t  |||||	|| || _|| _|| _|| _|| _|
| _|| _|| _	|| _
|| _|| _d S r   )r   r   rQ   r5   r1   r@   r   r   r   r   r2   ry   r   )r   rQ   r5   r1   r@   r   r   r   r   rY   r   r   r   r2   r   ry   r   r   r   r   r%   r&   r     s(    	zDictionaryLearning.__init__c                 C   s   |  | | S )  Fit the model from data in X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Returns the instance itself.
        )r   r   r%   r%   r&   rD     s    
zDictionaryLearning.fitTrZ   c           	      C   s   t | j| jd d| j }t| j}| |}| jdkrD|jd }n| j}t||| j	| j
| j|| j| j| j| j| j| j|d| j| jd\}}}| _|| _|| _|S )a  Fit the model from data in X and return the transformed data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        V : ndarray of shape (n_samples, n_components)
            Transformed data.
        r"   r   NrS   T)r5   r@   r1   r#   r   rY   r   r   r   r2   ry   r   r   r   )r'   r   r   r   ry   r   rQ   rA   r   r5   r@   r1   r   rY   r   r   r   r2   r   r   r   r   )	r   rM   r   r#   ry   rQ   VUEr%   r%   r&   r     s8    



z DictionaryLearning.fit_transformc                 C   s   | j jd S r   r   r   rA   r   r%   r%   r&   r   A  s    z"DictionaryLearning._n_features_outc                 C   s   dt jt jgiS Nr   r   r   r%   r%   r&   r   F  s     
zDictionaryLearning._more_tags)N)N)N)r   r   r   r   r   r   r   r   rC   ndarraycallabler   dict__annotations__r   rD   r   r   r   r   r   r   r%   r%   r   r&   r     s`   
 B +
4
r   c                       s  e Zd ZU dZeedddddgeeddddgeeddddeedhgeedddddgedd	hgdegeeddddgd
gde	j
geddd	ddhgeedddddgeedddddgdgd
gdgd
gd
geeddddgdegeeddddgeedddddgdZeed< d/dddd	ddddddddddddddddd fddZdd Zdd Zd d! Zd"d# Zd$d% Zedd&d0d'd(Zedd&d1d)d*Zed+d, Zd-d. Z  ZS )2r   a'  Mini-batch dictionary learning.

    Finds a dictionary (a set of atoms) that performs well at sparsely
    encoding the fitted data.

    Solves the optimization problem::

       (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1
                    (U,V)
                    with || V_k ||_2 <= 1 for all  0 <= k < n_components

    ||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for
    the entry-wise matrix norm which is the sum of the absolute values
    of all the entries in the matrix.

    Read more in the :ref:`User Guide <DictionaryLearning>`.

    Parameters
    ----------
    n_components : int, default=None
        Number of dictionary elements to extract.

    alpha : float, default=1
        Sparsity controlling parameter.

    n_iter : int, default=1000
        Total number of iterations over data batches to perform.

        .. deprecated:: 1.1
           ``n_iter`` is deprecated in 1.1 and will be removed in 1.4. Use
           ``max_iter`` instead.

    max_iter : int, default=None
        Maximum number of iterations over the complete dataset before
        stopping independently of any early stopping criterion heuristics.
        If ``max_iter`` is not None, ``n_iter`` is ignored.

        .. versionadded:: 1.1

    fit_algorithm : {'lars', 'cd'}, default='lars'
        The algorithm used:

        - `'lars'`: uses the least angle regression method to solve the lasso
          problem (`linear_model.lars_path`)
        - `'cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). Lars will be faster if
          the estimated components are sparse.

    n_jobs : int, default=None
        Number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    batch_size : int, default=256
        Number of samples in each mini-batch.

        .. versionchanged:: 1.3
           The default value of `batch_size` changed from 3 to 256 in version 1.3.

    shuffle : bool, default=True
        Whether to shuffle the samples before forming batches.

    dict_init : ndarray of shape (n_components, n_features), default=None
        Initial value of the dictionary for warm restart scenarios.

    transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'
        Algorithm used to transform the data:

        - `'lars'`: uses the least angle regression method
          (`linear_model.lars_path`);
        - `'lasso_lars'`: uses Lars to compute the Lasso solution.
        - `'lasso_cd'`: uses the coordinate descent method to compute the
          Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster
          if the estimated components are sparse.
        - `'omp'`: uses orthogonal matching pursuit to estimate the sparse
          solution.
        - `'threshold'`: squashes to zero all coefficients less than alpha from
          the projection ``dictionary * X'``.

    transform_n_nonzero_coefs : int, default=None
        Number of nonzero coefficients to target in each column of the
        solution. This is only used by `algorithm='lars'` and
        `algorithm='omp'`. If `None`, then
        `transform_n_nonzero_coefs=int(n_features / 10)`.

    transform_alpha : float, default=None
        If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the
        penalty applied to the L1 norm.
        If `algorithm='threshold'`, `alpha` is the absolute value of the
        threshold below which coefficients will be squashed to zero.
        If `None`, defaults to `alpha`.

        .. versionchanged:: 1.2
            When None, default value changed from 1.0 to `alpha`.

    verbose : bool or int, default=False
        To control the verbosity of the procedure.

    split_sign : bool, default=False
        Whether to split the sparse feature vector into the concatenation of
        its negative part and its positive part. This can improve the
        performance of downstream classifiers.

    random_state : int, RandomState instance or None, default=None
        Used for initializing the dictionary when ``dict_init`` is not
        specified, randomly shuffling the data when ``shuffle`` is set to
        ``True``, and updating the dictionary. Pass an int for reproducible
        results across multiple function calls.
        See :term:`Glossary <random_state>`.

    positive_code : bool, default=False
        Whether to enforce positivity when finding the code.

        .. versionadded:: 0.20

    positive_dict : bool, default=False
        Whether to enforce positivity when finding the dictionary.

        .. versionadded:: 0.20

    transform_max_iter : int, default=1000
        Maximum number of iterations to perform if `algorithm='lasso_cd'` or
        `'lasso_lars'`.

        .. versionadded:: 0.22

    callback : callable, default=None
        A callable that gets invoked at the end of each iteration.

        .. versionadded:: 1.1

    tol : float, default=1e-3
        Control early stopping based on the norm of the differences in the
        dictionary between 2 steps. Used only if `max_iter` is not None.

        To disable early stopping based on changes in the dictionary, set
        `tol` to 0.0.

        .. versionadded:: 1.1

    max_no_improvement : int, default=10
        Control early stopping based on the consecutive number of mini batches
        that does not yield an improvement on the smoothed cost function. Used only if
        `max_iter` is not None.

        To disable convergence detection based on cost function, set
        `max_no_improvement` to None.

        .. versionadded:: 1.1

    Attributes
    ----------
    components_ : ndarray of shape (n_components, n_features)
        Components extracted from the data.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_iter_ : int
        Number of iterations over the full dataset.

    n_steps_ : int
        Number of mini-batches processed.

        .. versionadded:: 1.1

    See Also
    --------
    DictionaryLearning : Find a dictionary that sparsely encodes data.
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    SparseCoder : Find a sparse representation of data from a fixed,
        precomputed dictionary.
    SparsePCA : Sparse Principal Components Analysis.

    References
    ----------

    J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning
    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.datasets import make_sparse_coded_signal
    >>> from sklearn.decomposition import MiniBatchDictionaryLearning
    >>> X, dictionary, code = make_sparse_coded_signal(
    ...     n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
    ...     random_state=42)
    >>> dict_learner = MiniBatchDictionaryLearning(
    ...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',
    ...     transform_alpha=0.1, max_iter=20, random_state=42)
    >>> X_transformed = dict_learner.fit_transform(X)

    We can check the level of sparsity of `X_transformed`:

    >>> np.mean(X_transformed == 0) > 0.5
    True

    We can compare the average squared euclidean norm of the reconstruction
    error of the sparse coded signal relative to the squared euclidean norm of
    the original signal:

    >>> X_hat = X_transformed @ dict_learner.components_
    >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
    0.052...
    rS   NrT   rU   r   r   r   r   rW   r(   r:   r   r=   r2   ry   )rQ   r5   r   r1   r   rY   r   r   r   r   r   r   r2   r   ry   r   r   r   r   r@   r   r   r   TFr)   r   rc   )r5   r   r1   r   rY   r   r   r   r   r   r   r2   r   ry   r   r   r   r   r@   r   c             	      sv   t  |
|||||| || _|| _|| _|| _|| _|	| _|| _|| _	|| _
|| _|| _|| _|| _|| _|| _d S r   )r   r   rQ   r5   r   r1   r   r   r2   r   r   r   ry   r   r   r   r@   )r   rQ   r5   r   r1   r   rY   r   r   r   r   r   r   r2   r   ry   r   r   r   r   r@   r   r   r%   r&   r   C  s0    	z$MiniBatchDictionaryLearning.__init__c                 C   sP   | j | _| jd kr|jd | _t| j| j d| j | _t| j|jd | _	d S )NrS   r   r   )
rQ   _n_componentsrA   r'   r   r   _fit_algorithmrh   r   _batch_sizer   r%   r%   r&   _check_paramsu  s    
z)MiniBatchDictionaryLearning._check_paramsc                 C   s   | j dk	r| j }n,t|| j|d\}}}|ddtjf | }| jt|krd|d| jddf }n.t|tj| jt| |jd f|j	df}t
|d|j	dd}tj|dd	}|S )
z!Initialization of the dictionary.Nr   rS   r   r}   Fr   r   r   )r   r   r  rC   r   rs   Zconcatenater   rA   r]   r   r   )r   rM   ry   rN   r   r   r%   r%   r&   _initialize_dict  s*    
  
z,MiniBatchDictionaryLearning._initialize_dictc                 C   s   ||d k r|d | }n|d | d | }|d | |d  }|  j |9  _ |  j |j| | 7  _ |  j|9  _|  j|j| | 7  _dS )zUpdate the inner stats inplace.rS   r   N)_ArE   _B)r   rM   rn   r   stepr   r   r%   r%   r&   _update_inner_stats  s    z/MiniBatchDictionaryLearning._update_inner_statsc              
   C   s   |j d }t||| j| j| j| j| j| jd}d|||  d   | jt	t	
|  | }| |||| t|||| j| j| j|| jd |S )z7Perform the update on the dictionary for one minibatch.r   )r-   r5   rY   r$   r1   r2   r   r   r   )rA   ra   r  r5   rY   r   r   r2   r   rC   rK   r  r|   r  r  r   )r   rM   rN   ry   r  r   rn   
batch_costr%   r%   r&   _minibatch_step  s8    
z+MiniBatchDictionaryLearning._minibatch_stepc              	   C   sl  |j d }|d }|td|| krH| jrDtd| d| d|  dS | jdkrZ|| _n.||d  }	t|	d}	| jd|	  ||	  | _| jrtd| d| d| d	| j  t|| | j }
| jdkr|
| jkr| jrtd
| d|  dS | j	dks| j| j	k r d| _
| j| _	n|  j
d7  _
| jdk	rh| j
| jkrh| jrdtd| d|  dS dS )a  Helper function to encapsulate the early stopping logic.

        Early stopping is based on two factors:
        - A small change of the dictionary between two minibatch updates. This is
          controlled by the tol parameter.
        - No more improvement on a smoothed estimate of the objective function for a
          a certain number of consecutive minibatch updates. This is controlled by
          the max_no_improvement parameter.
        r   rS   r   zMinibatch step /z: mean batch cost: FNz, ewa cost: z,Converged (small dictionary change) at step Tz>Converged (lack of improvement in objective function) at step )rA   rh   r2   ru   	_ewa_costr   rt   r  r@   _ewa_cost_min_no_improvementr   )r   rM   r  Znew_dictold_dictrO   r  n_stepsr   r5   Z	dict_diffr%   r%   r&   _check_convergence  sD    




z.MiniBatchDictionaryLearning._check_convergencerZ   c              
   C   s  | j |tjtjgddd}| | | jdkr@tdt | j}t	| j
| _| || j}| }| jr~| }| j| n|}|j\}}| jrtd tj| j| jf|jd| _tj|| jf|jd| _| jdk	rd| _d| _d	| _t|| j}	t|	}	tt || j }
| j|
 }d
}t!t"||	D ]f\}}|| }| #||| j|}| $|||||||rp q| j%dk	r| %t&  ||dd< q0|d | _'t | j'|
 | _(n| jdkrdn| j}t|| j}	t|	}	t!t"||	D ]v\}}| #|| || j| | jo*|t d| j  d	k}| jdks>|rLt| d | j%dk	r| %t&  q|| _'t |tt || j  | _(|| _)| S )r   r[   F)r]   r\   r   r   z'n_iter' is deprecated in version 1.1 and will be removed in version 1.4. Use 'max_iter' and let 'n_iter' to its default value instead. 'n_iter' is also ignored if 'max_iter' is specified.r~   r   Nr   r   rS   r)   r   rc   z batches processed.)*r   rC   r_   r`   r  r   r   r   r   r   ry   _random_stater  r   r   rA   r2   ru   r   r  r]   r  r  r1   r  r  r  r   r  r   r   rH   r   rm   rp   r  r  r   r   n_steps_r   r   )r   rM   r   r   rN   r  r   rO   rP   r   Zn_steps_per_iterr  ir   ZX_batchr  Ztrigger_verboser%   r%   r&   rD   	  s     
  

	

 

         

 zMiniBatchDictionaryLearning.fitc                 C   s   t | d}| j|tjtjgd| d}|s| | t| j| _| 	|| j}d| _
tj| j| jf|jd| _tj|jd | jf|jd| _n| j}| ||| j| j
 || _|  j
d7  _
| S )a  Update the model using the data in X as a mini-batch.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Return the instance itself.
        r   r[   )r]   r\   r   r   r   rS   )r   r   rC   r_   r`   r  r   ry   r  r  r  r   r  r]   r  rA   r  r   r  )r   rM   r   Zhas_componentsrN   r%   r%   r&   partial_fit	  s&    
 
  
 z'MiniBatchDictionaryLearning.partial_fitc                 C   s   | j jd S r  r  r   r%   r%   r&   r   	  s    z+MiniBatchDictionaryLearning._n_features_outc                 C   s   dt jt jgiS r  r   r   r%   r%   r&   r   	  s     
z&MiniBatchDictionaryLearning._more_tags)N)N)N)r   r   r   r   r   r   r   r   r   rC   r  r  r   r	  r
  r   r  r  r  r  r  r   rD   r   r   r   r   r   r%   r%   r   r&   r   L  s|   
 [ 2&Ds,
r   )NNFNF)N)r   ):r   r   r   r   r   mathr   numbersr   r   numpyrC   Zjoblibr   Zscipyr   baser   r	   r
   r   Zlinear_modelr   r   r   r   utilsr   r   r   r   Zutils._param_validationr   r   r   r   Zutils.extmathr   r   r   Zutils.parallelr   r   Zutils.validationr   r'   rR   rb   ra   r|   r   r   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s   ( T     
Pq
    ! I S  Y