o
    eR'                     @   sx  d dl Z d dlmZ d dlZddlmZmZmZ ddlmZ d)ddZ	d	d
 Z
e jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd Ze jjdddd  Ze jjddd!d" Zd#d$ Ze jjddd%d& Ze jjddd'd( ZdS )*    N)cuda   )
ASSERT_DRV	init_cudaload_module)kernel_string c                 C   s&   t | ddddddd|||fd d S Nr   r   r   cuLaunchKernel)kernelstreamargs	arg_typesr   r   SD:\Projects\ConvertPro\env\Lib\site-packages\cuda/benchmarks/test_launch_latency.pylaunch   s   r   c                 C   s"   t | ddddddd||d d S r	   r
   )r   r   paramsr   r   r   launch_packed   s   r   zlaunch-latency)groupc           	      C   sD   |\}}}|t |}t|d\}}t| | t|| t  d S )Ns   empty_kernel)r   r   cuModuleGetFunctionr   r   cuCtxSynchronize)		benchmarkr   r   devicectxr   moduleerrfuncr   r   r    test_launch_latency_empty_kernel   s   

r   c           
      C   s~   |\}}}|t |}t|d\}}t| tttj\}}	t| | t|||	fdd t	  t
|	\}t| d S )Ns   small_kernelNr   r   )r   r   r   r   
cuMemAllocctypessizeofc_floatr   r   	cuMemFree)
r   r   r   r   r   r   r   r   r   fr   r   r    test_launch_latency_small_kernel+   s   

r&   c                 C      |\}}}|t |}t|d\}}t| g }	d gd }
|
D ]}tttj\}}t| |	| qt	|	}	t	|
}
| t
|||	|
d t  |	D ]}t|\}t| qMd S )N   small_kernel_512_args   r   r   r   r   r   r    r!   r"   c_intappendtupler   r   r$   r   r   r   r   r   r   r   r   r   r   r   _pr   r   r   )test_launch_latency_small_kernel_512_args>   $   



r1   c                 C   p   |\}}}|t |}t|d\}}t| dgd }	tjgd }
t|	}	t|
}
| t|||	|
d t  d S )Ns   small_kernel_512_boolsTr)   r   )	r   r   r   r   r!   c_boolr-   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   *test_launch_latency_small_kernel_512_boolsX      


r6   c                 C   r3   )Ns   small_kernel_512_doublesgn?r)   r   )	r   r   r   r   r!   c_doubler-   r   r   r5   r   r   r   ,test_launch_latency_small_kernel_512_doublesj   r7   r9   c                 C   r3   )Ns   small_kernel_512_ints{   r)   r   )	r   r   r   r   r!   r+   r-   r   r   r5   r   r   r   )test_launch_latency_small_kernel_512_ints|   r7   r;   c                 C   r3   )Ns   small_kernel_512_chars   r)   r   )	r   r   r   r   r!   c_byter-   r   r   r5   r   r   r   *test_launch_latency_small_kernel_512_bytes   r7   r>   c                 C   r3   )Ns   small_kernel_512_longlongsl    r)   r   )	r   r   r   r   r!   
c_longlongr-   r   r   r5   r   r   r   .test_launch_latency_small_kernel_512_longlongs   r7   r@   c                 C   r'   )Ns   small_kernel_256_args   r   r*   r.   r   r   r   )test_launch_latency_small_kernel_256_args   r2   rB   c                 C   r'   )Ns   small_kernel_16_args   r   r*   r.   r   r   r   (test_launch_latency_small_kernel_16_args   r2   rD   c                 C   s   |\}}}|t |}t|d\}}t| g }	g }
tdD ]"}tttj\}}t| |		| |
	t
t|	|  qtj
d  }tdD ]}t|
| ||< qK| t||| t  |	D ]}t|\}t| qdd S Nr(   r)   )r   r   r   r   ranger    r!   r"   r+   r,   c_void_pint	addressofr   r   r$   )r   r   r   r   r   r   r   r   r   valsZval_psir0   ZpackagedParamsr   r   r   0test_launch_latency_small_kernel_512_args_ctypes   s(   



rL   c                 C   sz   t jt|  }dgt| }tt|D ]}t t|| ||< t || ||< qt| ddddddd||d d S )Nr   r   )r!   rG   lenrF   rH   rI   r   r   )r   r   r   Zpacked_paramsZptrsrK   r   r   r   pack_and_launch  s   rN   c                 C   s   |\}}}|t |}t|d\}}t| g }	tdD ]}
tttj\}}t| |		| q| t
|||	 t  |	D ]}t|\}t| q@d S rE   )r   r   r   r   rF   r    r!   r"   r+   r,   rN   r   r$   )r   r   r   r   r   r   r   r   r   rJ   rK   r0   r   r   r   =test_launch_latency_small_kernel_512_args_ctypes_with_packing  s   


rO   c           
      C   s`   |\}}}|t |}t|d\}}t| G dd dtj}	| t|||	 fdd t  d S )Ns   small_kernel_2048Bc                   @   s   e Zd Zdejd fgZdS )z<test_launch_latency_small_kernel_2048B.<locals>.struct_2048Bvaluesi   N)__name__
__module____qualname__r!   c_uint8_fields_r   r   r   r   struct_2048B2  s    rV   r   r   )r   r   r   r   r!   	Structurer   r   )
r   r   r   r   r   r   r   r   r   rV   r   r   r   &test_launch_latency_small_kernel_2048B*  s   

rX   )r   r   )Zpytestr   r!   Zperf_test_utilsr   r   r   Zkernelsr   r   r   markr   r   r&   r1   r6   r9   r;   r>   r@   rB   rD   rL   rN   rO   rX   r   r   r   r   <module>   sD   












