U
    9%e{                     @   sf  U d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z?m@ZA i ZBi ZCeeeef ef eDd< d d!gZEd"d  ZFeG d#d$ d$ZGd%d& ZHd'd( ZId)d* ZJd+d, ZKeee	jLd-d.d!ZMeFeed/d0 ZNeFeed1d2 ZOeFeed3d4 ZPeFeed5d6 ZQeFeed7d8 ZReFeed9d: ZSeFeed;d< ZTeFeed=d> ZUeFe e d?d@ ZVeFe$e$dAdB ZWeFe"e"dCdD ZXeFe&e&dEdF ZYeFe*e*dGdH ZZeFe.e.dIdJ Z[eFe1e.dKdL Z\eFe.e1dMdN Z]eFe1e1dOdP Z^eFe3e3dQdR Z_eFe5e5dSdT Z`eFe7e7dUdV ZaeFe9e9dWdX ZbeFe;e;dYdZ ZceFe=e=d[d\ ZdeFee9d]d^ ZeeFeed_d` ZfeFee7dadb ZgeFeedcdd ZheFee dedf ZieFee3dgdh ZjeFee=didj ZkeFee7dkdl ZleFeedmdn ZmeFee3dodp ZneFee=dqdr ZoeFeeeFeeeFee7eFee=dsdt ZpeFee dudv ZqeFee$dwdx ZreFee3dydz ZseFe eeFe eeFe e7eFe e=d{d| ZteFe ed}d~ ZueFe e$dd ZveFe e3dd ZweFe$eeFe$eeFe$eeFe$e eFe$e7eFe$e=dd ZxeFe$e3dd ZyeFe*eeFe*eeFe*eeFe*e eFe*e7eFe*e=dd ZzeFe*e3dd Z{eFe3eeFe3eeFe3eeFe3e eFe3e7eFe3e=dd Z|eFe3e$dd Z}eFe3e*dd Z~eFe7eeFe7eeFe7e=dd ZeFe7edd ZeFe7e dd ZeFe7e3dd ZeFe9eeFe9edd ZeFe=edd ZeFe=edd ZeFe=edd ZeFe=e dd ZeFe=e$dd ZeFe=e3dd ZeFe=e7dd ZeFe(e(dd ZeFeedd Zdd ZdS )    N)total_ordering)CallableDictTupleType)inf   )	Bernoulli)Beta)Binomial)Categorical)Cauchy)ContinuousBernoulli)	Dirichlet)Distribution)ExponentialFamily)Exponential)Gamma)	Geometric)Gumbel)
HalfNormal)Independent)Laplace)_batch_lowrank_logdet_batch_lowrank_mahalanobisLowRankMultivariateNormal)_batch_mahalanobisMultivariateNormal)Normal)OneHotCategorical)Pareto)Poisson)TransformedDistribution)Uniform)_sum_rightmosteuler_constant_KL_MEMOIZEregister_klkl_divergencec                    sV   t  ts"t tr"td  t tsDttrDtd  fdd}|S )a[  
    Decorator to register a pairwise function with :meth:`kl_divergence`.
    Usage::

        @register_kl(Normal, Normal)
        def kl_normal_normal(p, q):
            # insert implementation here

    Lookup returns the most specific (type,type) match ordered by subclass. If
    the match is ambiguous, a `RuntimeWarning` is raised. For example to
    resolve the ambiguous situation::

        @register_kl(BaseP, DerivedQ)
        def kl_version1(p, q): ...
        @register_kl(DerivedP, BaseQ)
        def kl_version2(p, q): ...

    you should register a third most-specific implementation, e.g.::

        register_kl(DerivedP, DerivedQ)(kl_version1)  # Break the tie.

    Args:
        type_p (type): A subclass of :class:`~torch.distributions.Distribution`.
        type_q (type): A subclass of :class:`~torch.distributions.Distribution`.
    z6Expected type_p to be a Distribution subclass but got z6Expected type_q to be a Distribution subclass but got c                    s   | t  f< t  | S N)_KL_REGISTRYr&   clear)funtype_ptype_q U/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/distributions/kl.py	decoratorT   s    zregister_kl.<locals>.decorator)
isinstancetype
issubclassr   	TypeError)r.   r/   r2   r0   r-   r1   r'   1   s    c                   @   s*   e Zd ZdgZdd Zdd Zdd ZdS )	_Matchtypesc                 G   s
   || _ d S r)   r8   )selfr8   r0   r0   r1   __init__`   s    z_Match.__init__c                 C   s   | j |j kS r)   r9   )r:   otherr0   r0   r1   __eq__c   s    z_Match.__eq__c                 C   s8   t | j|jD ]$\}}t||s& dS ||k	r q4qdS )NFT)zipr8   r5   )r:   r<   xyr0   r0   r1   __le__f   s    
z_Match.__le__N)__name__
__module____qualname__	__slots__r;   r=   rA   r0   r0   r0   r1   r7   \   s   r7   c           	         s    fddt D }|stS tdd |D j\}}tdd |D j\}}t ||f }t ||f }||k	rtd jj|j|jt |S )zP
    Find the most specific approximate match, assuming single inheritance.
    c                    s,   g | ]$\}}t  |rt |r||fqS r0   )r5   ).0Zsuper_pZsuper_qr-   r0   r1   
<listcomp>s   s   
 
z _dispatch_kl.<locals>.<listcomp>c                 s   s   | ]}t | V  qd S r)   )r7   rF   mr0   r0   r1   	<genexpr>}   s     z_dispatch_kl.<locals>.<genexpr>c                 s   s   | ]}t t| V  qd S r)   )r7   reversedrH   r0   r0   r1   rJ   ~   s     z;Ambiguous kl_divergence({}, {}). Please register_kl({}, {}))	r*   NotImplementedminr8   warningswarnformatrB   RuntimeWarning)	r.   r/   matchesZleft_pZleft_qZright_qZright_pZleft_funZ	right_funr0   r-   r1   _dispatch_klo   s(       rS   c                 C   s   t | tS )zI
    Helper function for obtaining infinite KL Divergence throughout
    )torchZ	full_liker   Ztensorr0   r0   r1   _infinite_like   s    rV   c                 C   s   | |    S )z2
    Utility function for calculating x log x
    )logrU   r0   r0   r1   _x_log_x   s    rX   c                 C   sD   |  d}|  d}| d|| dd}|| jdd S )zp
    Utility function for calculating the trace of XX^{T} with X having arbitrary trailing batch dimensions
       N)sizeZreshapepowsumshape)ZbmatnrI   Z
flat_tracer0   r0   r1   _batch_trace_XXT   s    

ra   )pqreturnc                 C   s   zt t| t|f }W n: tk
rR   tt| t|}|t t| t|f< Y nX |tkrxtd| jj d|jj || |S )a"  
    Compute Kullback-Leibler divergence :math:`KL(p \| q)` between two distributions.

    .. math::

        KL(p \| q) = \int p(x) \log\frac {p(x)} {q(x)} \,dx

    Args:
        p (Distribution): A :class:`~torch.distributions.Distribution` object.
        q (Distribution): A :class:`~torch.distributions.Distribution` object.

    Returns:
        Tensor: A batch of KL divergences of shape `batch_shape`.

    Raises:
        NotImplementedError: If the distribution types have not been registered via
            :meth:`register_kl`.
    z(No KL(p || q) is implemented for p type z and q type )r&   r4   KeyErrorrS   rL   NotImplementedError	__class__rB   )rb   rc   r,   r0   r0   r1   r(      s    c                 C   s   | j tjj|j tjj| j   }t||j dk< d|| j dk< d| j  tjj|jtjj| j  }t||j dk< d|| j dk< || S Nr   r   )probsrT   nnZ
functionalZsoftpluslogitsr   rb   rc   t1t2r0   r0   r1   _kl_bernoulli_bernoulli   s    ro   c           	      C   s   | j | j }|j |j }|j  |j  |  }| j  | j  |  }| j |j  t| j  }| j|j t| j }|| t| }|| | | | S r)   )concentration1concentration0lgammarT   digamma)	rb   rc   Zsum_params_pZsum_params_qrm   rn   t3t4t5r0   r0   r1   _kl_beta_beta   s    rw   c                 C   sh   | j |j k  rtd| j | j| j|j  | j   |j    }| j |j k}t|| ||< |S )NzKKL between Binomials where q.total_count > p.total_count is not implemented)Ztotal_countanyrf   ri   rk   log1prV   )rb   rc   klZinf_idxsr0   r0   r1   _kl_binomial_binomial   s    (r{   c                 C   sD   | j | j|j  }t||j dk|< d|| j dk|< |dS )Nr   rY   )ri   rk   r   Z	expand_asr^   )rb   rc   tr0   r0   r1   _kl_categorical_categorical   s    r}   c                 C   sL   | j | j|j  }|  t| j  }|  t|j  }|| | S r)   )meanrk   _cont_bern_log_normrT   ry   ri   rb   rc   rm   rn   rt   r0   r0   r1   -_kl_continuous_bernoulli_continuous_bernoulli   s    r   c                 C   s|   | j d}|j d}| |  }| j  |j   d}| j |j  }| j  | d }|| || d S )NrY   )concentrationr^   rr   rs   	unsqueeze)rb   rc   Zsum_p_concentrationZsum_q_concentrationrm   rn   rt   ru   r0   r0   r1   _kl_dirichlet_dirichlet  s    r   c                 C   s"   |j | j  }|  }|| d S Nr   raterW   )rb   rc   Z
rate_ratiorm   r0   r0   r1   _kl_exponential_exponential  s    
r   c                 C   s   t | t |kstddd | jD }|j}| j| }tjj| |dd}|j| | }t|||D ]*\}}}	|| |	 }
|t	|
t
|j8 }qh|S )NzThe cross KL-divergence between different exponential families cannot                             be computed using Bregman divergencesc                 S   s   g | ]}|   qS r0   )detachZrequires_grad_)rF   npr0   r0   r1   rG     s     z+_kl_expfamily_expfamily.<locals>.<listcomp>T)Zcreate_graph)r4   rf   Z_natural_paramsZ_log_normalizerrT   ZautogradZgradr^   r>   r$   lenevent_shape)rb   rc   Z	p_nparamsZ	q_nparamsZ	lg_normalZ	gradientsresultZpnpZqnpgtermr0   r0   r1   _kl_expfamily_expfamily  s    
r   c                 C   sn   |j | j|j   }t|j t| j  }| j |j  t| j  }|j| j | j | j  }|| | | S r)   )r   r   rW   rT   rr   rs   rb   rc   rm   rn   rt   ru   r0   r0   r1   _kl_gamma_gamma*  s
    r   c                 C   sl   | j |j  }|j|j  }| j|j  }|  | | }|t }t|d|   | }|| | dt  S r   )scalelocrW   _euler_gammarT   exprr   )rb   rc   Zct1Zct2Zct3rm   rn   rt   r0   r0   r1   _kl_gumbel_gumbel3  s    r   c                 C   s$   |    t|j | j  |j S r)   )entropyrT   ry   ri   rk   rb   rc   r0   r0   r1   _kl_geometric_geometric>  s    r   c                 C   s   t | j|jS r)   )_kl_normal_normal	base_distr   r0   r0   r1   _kl_halfnormal_halfnormalC  s    r   c                 C   sV   | j |j  }| j|j  }|  }||j  }|t| | j   }|| | d S r   )r   r   absrW   rT   r   )rb   rc   scale_ratioZloc_abs_diffrm   rn   rt   r0   r0   r1   _kl_laplace_laplaceH  s    

r   c                 C   s   | j |j krtdt|j|j|jt| j| j| j }t|j|j|j| j |j}|jj|j	d }t
jj|j|dd}| j|j d}t| j|j 	d }t|| j 	d }t|| j}	|| | |	 }
d||
 | | j d   S )NzKL-divergence between two Low Rank Multivariate Normals with                          different event shapes cannot be computedrZ   FupperrY         ?r   )r   
ValueErrorr   _unbroadcasted_cov_factor_unbroadcasted_cov_diag_capacitance_trilr   r   mTr   rT   linalgsolve_triangularr^   ra   rsqrtsqrtmatmul)rb   rc   term1term3	qWt_qDinvAterm21term22Zterm23Zterm24term2r0   r0   r1   7_kl_lowrankmultivariatenormal_lowrankmultivariatenormalS  s>        
	r   c           	      C   s   | j |j krtdt|j|j|jd| jjddd 	d  }t
|j|j|j| j |j}|jj|jd }tjj|j|dd}t| j|j d }t|| j}|| }d|| | | j d	   S )
NKL-divergence between two (Low Rank) Multivariate Normals with                          different event shapes cannot be computedr[   rZ   rY   Zdim1Zdim2Fr   r   r   )r   r   r   r   r   r   _unbroadcasted_scale_trildiagonalrW   r^   r   r   r   r   rT   r   r   ra   r   r   )	rb   rc   r   r   r   r   r   r   r   r0   r0   r1   0_kl_multivariatenormal_lowrankmultivariatenormalu  s2      
	r   c                 C   s$  | j |j krtdd|jjddd d t| j| j| j	 }t
|j|j| j }tj|jjd d | jjd d }| j d }|j|||f }| j||| jdf }t| j |||f }ttjj||dd}	ttjj||dd}
|	|
 }d	|| | | j d   S )
Nr   r[   rZ   rY   r   r   Fr   r   )r   r   r   r   rW   r^   r   r   r   r   r   r   rT   _C_infer_sizer_   expandZ
cov_factorr\   Z
diag_embedr   ra   r   r   )rb   rc   r   r   combined_batch_shaper`   q_scale_trilZp_cov_factorZ
p_cov_diagr   r   r   r0   r0   r1   0_kl_lowrankmultivariatenormal_multivariatenormal  sD       

r   c           	      C   s   | j |j krtd|jjddd d| jjddd d }tj|jj	d d | jj	d d }| j d }|j
|||f }| j
|||f }ttjj||dd}t|j|j| j }|d|| |   S )	NzvKL-divergence between two Multivariate Normals with                          different event shapes cannot be computedrZ   rY   r   r   Fr   r   )r   r   r   r   rW   r^   rT   r   r   r_   r   ra   r   r   r   r   )	rb   rc   Z
half_term1r   r`   r   Zp_scale_trilr   r   r0   r0   r1   )_kl_multivariatenormal_multivariatenormal  s*     
r   c                 C   sB   | j |j  d}| j|j |j  d}d|| d |   S Nr[   r   r   r   r]   r   rW   )rb   rc   Z	var_ratiorm   r0   r0   r1   r     s    r   c                 C   s   t | j|jS r)   )r}   Z_categoricalr   r0   r0   r1   '_kl_onehotcategorical_onehotcategorical  s    r   c                 C   sX   | j |j  }|j| j }|j|  }|  }|| | d }t|| jj|jjk < |S r   )r   alpharW   r   supportlower_bound)rb   rc   r   Zalpha_ratiorm   rn   r   r0   r0   r1   _kl_pareto_pareto  s    
r   c                 C   s&   | j | j  |j    | j |j   S r)   r   r   r0   r0   r1   _kl_poisson_poisson  s    r   c                 C   s.   | j |j krt| j|jkr tt| j|jS r)   )Z
transformsrf   r   r(   r   r   r0   r0   r1   _kl_transformed_transformed  s
    r   c                 C   s<   |j |j | j | j   }t||j| jk|j | j k B < |S r)   )highlowrW   r   rb   rc   r   r0   r0   r1   _kl_uniform_uniform  s    r   c                 C   s    |    | j|j  |j  S r)   )r   ri   r   rW   r   r0   r0   r1   _kl_bernoulli_poisson  s    r   c                 C   s,   |    | j|j  t|j  |  S r)   )r   r~   rk   rT   ry   ri   r   r   r0   r0   r1   _kl_beta_continuous_bernoulli  s    
r   c                 C   s
   t | jS r)   )rV   rp   r   r0   r0   r1   _kl_beta_infinity  s    r   c                 C   s,   |    |j  |j| j| j| j    S r)   )r   r   rW   rp   rq   r   r0   r0   r1   _kl_beta_exponential  s    r   c                 C   sp   |    }|j |j|j   }|jd | j | j| j    }|j| j | j| j  }|| | | S r   )r   r   rr   r   rW   rp   rs   rq   r   r0   r0   r1   _kl_beta_gamma  s    
r   c           	      C   s   | j | j | j  }|jd}|   }d|d tj   }|d|  | j | j d  |d d }|j| }|jdd }|| || | |  S r   )	rp   rq   r   r]   r   mathpirW   r   )	rb   rc   ZE_beta
var_normalrm   rn   rt   ru   rv   r0   r0   r1   _kl_beta_normal*  s    

r   c                 C   s>   |    |j|j   }t||j| jjk|j| jjk B < |S r)   )r   r   r   rW   r   r   r   upper_boundr   r0   r0   r1   _kl_beta_uniform9  s     r   c                 C   s
   t | jS r)   )rV   ri   r   r0   r0   r1   !_kl_continuous_bernoulli_infinityC  s    r   c                 C   s"   |    t|j |j| j  S r)   )r   rT   rW   r   r~   r   r0   r0   r1   $_kl_continuous_bernoulli_exponentialH  s    r   c                 C   sz   |    }dtdtj t|j|j   t|j }| jt| j	 d|j | j	  dt|j  }|| | S )Nr   g       @)
r   r   rW   r   rT   Zsquarer   r   Zvariancer~   r   r0   r0   r1   _kl_continuous_bernoulli_normalQ  s    
( r   c              	   C   sV   |    |j|j   }ttt|j| jj	t
|j| jjt|t |S r)   )r   r   r   rW   rT   wheremaxger   r   ler   	ones_liker   r   r0   r0   r1    _kl_continuous_bernoulli_uniform]  s    r   c                 C   s
   t | jS r)   rV   r   r   r0   r0   r1   _kl_exponential_infinityj  s    r   c                 C   sB   |j | j  }|j t| }|| |j  |jt  dt  S r   )r   r   rT   rW   rr   r   )rb   rc   ratiorm   r0   r0   r1   _kl_exponential_gammar  s    r   c                 C   sR   | j |j }|j|j }| d }t|| |d  }| }|| | | S r   )r   r   r   rW   rT   r   
reciprocal)rb   rc   scale_rate_prodloc_scale_ratiorm   rn   rt   r0   r0   r1   _kl_exponential_gumbel  s    r   c                 C   sp   |j d}| jd}dt|| d tj  }| }|j| j }|jdd }|d || | |  S r   )	r   r]   r   rT   rW   r   r   r   r   )rb   rc   r   Zrate_sqrrm   rn   rt   ru   r0   r0   r1   _kl_exponential_normal  s    r   c                 C   s
   t | jS r)   )rV   r   r   r0   r0   r1   _kl_gamma_infinity  s    r   c                 C   s&   |    |j  |j| j | j  S r)   )r   r   rW   r   r   r0   r0   r1   _kl_gamma_exponential  s    r   c                 C   s~   | j |j }|j|j }| jd | j  | j  | j }| | j|  }t|d|	  
| j  | }|| | S r   )r   r   r   r   rs   rr   rW   rT   r   r   r]   )rb   rc   Zbeta_scale_prodr   rm   rn   rt   r0   r0   r1   _kl_gamma_gumbel  s     r   c                 C   s   |j d}| jd}dt|| d tj  | j | j  }d| jd| j  | }|j	| j | j }d|j	d }|| jd | j
   || | |  S r   )r   r]   r   rT   rW   r   r   r   rr   r   rs   )rb   rc   r   Zbeta_sqrrm   rn   rt   ru   r0   r0   r1   _kl_gamma_normal  s"    r   c                 C   s
   t | jS r)   rV   r   r   r0   r0   r1   _kl_gumbel_infinity  s    r   c                 C   sx   | j |j  }|tdtj   }tj| d dd }| j| j t  |j |j  dd }| | | td  S )Nr[   r      r   )r   r   r   r   rW   r]   r   r   )rb   rc   Zparam_ratiorm   rn   rt   r0   r0   r1   _kl_gumbel_normal  s
    &r   c                 C   s
   t | jS r)   r   r   r0   r0   r1   _kl_laplace_infinity  s    r   c                 C   s~   |j d}| j d| }dtd| tj  }d| jd }| j|j }d|jd }| | || | |  d S r   )r   r]   rT   rW   r   r   r   )rb   rc   r   Zscale_sqr_var_ratiorm   rn   rt   ru   r0   r0   r1   _kl_laplace_normal  s    r   c                 C   s
   t | jS r)   r   r   r0   r0   r1   _kl_normal_infinity  s    r   c                 C   s|   | j |j }| j|j d}|j |j }| d }|| }t| d|  | }| | | ddtdtj    S r   )r   r   r]   rW   rT   r   r   r   )rb   rc   Zmean_scale_ratioZvar_scale_sqr_ratior   rm   rn   rt   r0   r0   r1   _kl_normal_gumbel  s    r   c                 C   s   | j |j  }| j|j }|| j }t|}tdtj | j td|d  }|t	td|  }| || |j  ddtdtj    S )Nr[   g      r   r   )
r   r   rT   rW   r   r   r   r   r]   erf)rb   rc   Zloc_diffr   Zloc_diff_scale_ratiorm   rn   rt   r0   r0   r1   _kl_normal_laplace  s    

(r   c                 C   s
   t | jS r)   )rV   r   r   r0   r0   r1   _kl_pareto_infinity  s    r   c                 C   sZ   | j |j }| j|  }| j }| j| | jd  }|| | d }t|| jdk< |S r   )r   r   r   rW   r   r   )rb   rc   r   rm   rn   rt   r   r0   r0   r1   _kl_pareto_exponential   s    
r   c                 C   s   | j  | j  }| j | }|j |j|j   }d|j | }|j| j | j  | jd  }|| | | d }t|| jdk< |S r   )r   rW   r   r   r   rr   r   r   rb   rc   common_termrm   rn   rt   ru   r   r0   r0   r1   _kl_pareto_gamma+  s    r   c           	      C   s   d|j d }| j | jd  }tdtj |j  | j | j   }| j }| j|d | jd  }| j| |j d}|| || |  d }t	|| jdk< |S )Nr[   r   )
r   r]   r   r   r   r   rW   r   r   r   )	rb   rc   r   r   rm   rn   rt   ru   r   r0   r0   r1   _kl_pareto_normal:  s    &
r   c                 C   s
   t | jS r)   r   r   r0   r0   r1   _kl_poisson_infinityG  s    r   c                 C   s   | j | j }t|}|jd t| j t| j |  | }|jd td| j  td| j  |  | }|j |j  |j|j   }|| | | }t|| j |j	j
k| j|j	jk B < |S r   )r   r   rT   rW   rp   rX   rq   rr   r   r   r   r   r   r0   r0   r1   _kl_uniform_betaM  s.    
 r  c              	   C   sh   |    | j|j  t|j  |  }ttt	| j
|jjt| j|jjt|t |S r)   )r   r~   rk   rT   ry   ri   r   r   r   r   r   r   r   r   r   r   r   r   r   r0   r0   r1    _kl_uniform_continuous_bernoullie  s     
r  c                 C   sB   |j | j| j  d | j| j |j    }t|| j|jjk < |S )Nr[   )r   r   r   rW   r   r   r   r   r0   r0   r1   _kl_uniform_exponetialw  s    ,r  c                 C   s   | j | j }| }|j |j|j   }d|j t| j t| j |  | }|j| j | j  d }| | | | }t|| j|jj	k < |S )Nr   r[   )
r   r   rW   r   rr   r   rX   r   r   r   r   r0   r0   r1   _kl_uniform_gamma~  s    r  c                 C   sn   |j | j| j  }| j|j |j  }| j|j |j  }| d||   }|t| t|   }|| S )Nr   )r   r   r   r   rW   rT   r   )rb   rc   r   Zhigh_loc_diffZlow_loc_diffrm   rn   r0   r0   r1   _kl_uniform_gumbel  s    r  c                 C   st   | j | j }ttjd |j |  }|dd }| j | j d|j  d d}|d||  |jd  S )Nr[      r   )	r   r   r   r   r   r   rW   r]   r   )rb   rc   r   rm   rn   rt   r0   r0   r1   _kl_uniform_normal  s
     r  c                 C   sl   | j | j }|j|j|j |  }t| j t| j | | }||jd  | }t|| j|jj	k < |S r   )
r   r   r   r   r]   rW   rX   r   r   r   )rb   rc   Zsupport_uniformrm   rn   r   r0   r0   r1   _kl_uniform_pareto  s    r  c                 C   s*   | j |j krtt| j|j}t|| j S r)   )Zreinterpreted_batch_ndimsrf   r(   r   r$   r   r0   r0   r1   _kl_independent_independent  s    r	  c                 C   sD   | j |j  d| j|j d  }d| j  |j   }|| S )Nr[      r   rl   r0   r0   r1   _kl_cauchy_cauchy  s    (r  c                  C   s^   dg} t tdd dD ]$\}}| d|j d|j d qd| }tjrZt j|7  _d	S )
zHAppends a list of implemented KL functions to the doc for kl_divergence.zLKL divergence is currently implemented for the following distribution pairs:c                 S   s   | d j | d j fS rh   )rB   )Zp_qr0   r0   r1   <lambda>      z_add_kl_info.<locals>.<lambda>)keyz* :class:`~torch.distributions.z#` and :class:`~torch.distributions.`z
	N)sortedr*   appendrB   joinr(   __doc__)rowsrb   rc   Zkl_infor0   r0   r1   _add_kl_info  s     
r  )r   rN   	functoolsr   typingr   r   r   r   rT   r   Z	bernoullir	   betar
   Zbinomialr   Zcategoricalr   Zcauchyr   Zcontinuous_bernoullir   Z	dirichletr   distributionr   Z
exp_familyr   Zexponentialr   gammar   Z	geometricr   Zgumbelr   Zhalf_normalr   Zindependentr   Zlaplacer   Zlowrank_multivariate_normalr   r   r   Zmultivariate_normalr   r   normalr   Zone_hot_categoricalr   Zparetor    Zpoissonr!   Ztransformed_distributionr"   uniformr#   utilsr$   r%   r   r*   r&   __annotations____all__r'   r7   rS   rV   rX   ra   ZTensorr(   ro   rw   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r	  r  r  r0   r0   r0   r1   <module>   s   +
&















!

$








	




	



























	

