################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################

cmake_policy(SET CMP0057 NEW)

include(ExportHeader)
if(MIOPEN_ENABLE_SQLITE)
    add_subdirectory(sqlite)
endif()

# Truncation rounding or (default) rounding to nearest even (RNE) is enabled.
# This switch controls two related but different aspects of MIOpen behavior
# 1.  How host code performs conversions of float to bfloat16, important only
#     for testing.
# 2.  How BF16 kernels (which are kind of mixed-precision now and expected to
#     remain in the future)  perform final conversion (and rounding) of FP32
#     to BF16 results. This affects the main functionality of the library.
option( MIOPEN_USE_RNE_BFLOAT16 "Sets rounding scheme for bfloat16 type" ON )
option( MIOPEN_FP8_IEEE_EXPONENT_BIAS "Sets the FP8 exponent bias to IEEE" OFF)
option( MIOPEN_FP8_CLIPPING "Sets the FP8 clipping" ON)

option( MIOPEN_FP8_CLIPPING "Sets the FP8 clipping" ON)

configure_file("${PROJECT_SOURCE_DIR}/include/miopen/config.h.in" "${PROJECT_BINARY_DIR}/include/miopen/config.h")

# configure a header file to pass the CMake version settings to the source, and package the header files in the output archive
configure_file( "${PROJECT_SOURCE_DIR}/include/miopen/version.h.in" "${PROJECT_BINARY_DIR}/include/miopen/version.h" )

message( STATUS "MIOpen_VERSION= ${MIOpen_VERSION}" )
if(NOT MIOPEN_GENERATOR_IS_MULTI_CONFIG)
    message( STATUS "CMAKE_BUILD_TYPE= ${CMAKE_BUILD_TYPE}" )
endif()

# This is incremented when the ABI to the library changes
set( MIOpen_SOVERSION 1.0 )

function(add_kernels FILE_NAME VAR_PREFIX VAR_SUFFIX KERNEL_FILES)
    set(INIT_KERNELS_LIST)
    set(KERNELS_DECLS)
    foreach(KERNEL_FILE ${KERNEL_FILES})
        set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${KERNEL_FILE})
        get_filename_component(KERNEL_FILENAME ${KERNEL_FILE} NAME)
        get_filename_component(BASE_NAME ${KERNEL_FILE} NAME_WE)
        string(TOUPPER "${BASE_NAME}" KEY_NAME)
        string(MAKE_C_IDENTIFIER "${KEY_NAME}" VAR_NAME)
        string(APPEND KERNELS_DECLS "extern const size_t ${VAR_PREFIX}${VAR_NAME}${VAR_SUFFIX}_SIZE;\n")
        string(APPEND KERNELS_DECLS "extern const char ${VAR_PREFIX}${VAR_NAME}${VAR_SUFFIX}[];\n")
        list(APPEND INIT_KERNELS_LIST "    { \"${KERNEL_FILENAME}\", { ${VAR_PREFIX}${VAR_NAME}${VAR_SUFFIX}, ${VAR_PREFIX}${VAR_NAME}${VAR_SUFFIX}_SIZE } }")
    endforeach()
    string(REPLACE ";" ",\n" INIT_KERNELS "${INIT_KERNELS_LIST}")
    configure_file(kernels/${FILE_NAME}.in ${PROJECT_BINARY_DIR}/${FILE_NAME})
endfunction()

set( MIOpen_Source
    activ/problem_description.cpp
    activ_api.cpp
    adam/problem_description.cpp
    adam_api.cpp
    addlayernorm_api.cpp
    api/find2_0_commons.cpp
    base64.cpp
    batch_norm.cpp
    batch_norm_api.cpp
    batchnorm/problem_description.cpp
    buffer_info.cpp
    cat_api.cpp
    cat/problem_description.cpp
    check_numerics.cpp
    conv/invokers/gcn_asm_1x1u.cpp
    conv/invokers/gcn_asm_1x1u_ss.cpp
    conv/invokers/gcn_asm_1x1u_us.cpp
    conv/invokers/gcn_asm_wino.cpp
    conv/invokers/gen_x_w_y_pad.cpp
    conv/invokers/impl_gemm.cpp
    conv/invokers/impl_gemm_dynamic.cpp
    conv/invokers/ocl_wrw_rdc.cpp
    conv/kernel_interface/winograd_kernel_interface.cpp
    conv/problem_description.cpp
    conv/solver_finders.cpp
    conv_algo_name.cpp
    convolution.cpp
    convolution_api.cpp
    ctc.cpp
    ctc_api.cpp
    db.cpp
    db_record.cpp
    driver_arguments.cpp
    dropout.cpp
    dropout_api.cpp
    env.cpp
    env_debug.cpp
    execution_context.cpp
    expanduser.cpp
    fin/fin_interface.cpp
    find_controls.cpp
    find_db.cpp
    fused_api.cpp
    fusion.cpp
    fusion/problem_description.cpp
    generic_search.cpp
    getitem_api.cpp
    glu/problem_description.cpp
    glu_api.cpp
    graphapi/convolution.cpp
    graphapi/conv_bias_res_add_activ_forward_executor.cpp
    graphapi/engine.cpp
    graphapi/enginecfg.cpp
    graphapi/engineheur.cpp
    graphapi/execution_plan.cpp
    graphapi/find_engine.cpp
    graphapi/graphapi.cpp
    graphapi/matmul.cpp
    graphapi/opgraph.cpp
    graphapi/pointwise.cpp
    graphapi/reduction.cpp
    graphapi/reshape.cpp
    graphapi/rng.cpp
    graphapi/tensor.cpp
    graphapi/variant_pack.cpp
    groupnorm_api.cpp
    groupnorm/problem_description.cpp
    handle_api.cpp
    invoker_cache.cpp
    getitem/problem_description.cpp
    kernel_build_params.cpp
    kernel_warnings.cpp
    kthvalue/problem_description.cpp
    kthvalue_api.cpp
    layernorm_api.cpp
    layernorm/problem_description.cpp
    load_file.cpp
    lock_file.cpp
    logger.cpp
    lrn_api.cpp
    mha/mha_descriptor.cpp
    mha/problem_description.cpp
    multimarginloss/problem_description.cpp
    multimarginloss_api.cpp
    op_args.cpp
    operator.cpp
    performance_config.cpp
    pooling/problem_description.cpp
    pooling_api.cpp
    prelu/problem_description.cpp
    prelu_api.cpp
    problem.cpp
    process.cpp
    ramdb.cpp
    readonlyramdb.cpp
    reducecalculation_api.cpp
    reduceextreme_api.cpp
    reducetensor.cpp
    reducetensor_api.cpp
    reduce/problem_description.cpp
    rnn.cpp
    rnn_api.cpp
    rnn/rnn_util.cpp
    rnn/selector.cpp
    rnn/Solutions/rnn_transformer.cpp
    rnn/Solutions/Base/fw_data_modular.cpp
    rnn/Solutions/Base/bw_weights_modular.cpp
    rnn/Solutions/Base/bw_data_modular.cpp
    rnn/Solutions/fwd_s_stream.cpp
    rnn/Solutions/bwd_s_stream.cpp
    rnn/Solutions/bwd_multi_stream.cpp
    rnn/Solutions/bww_s_steam.cpp
    rnn/Solutions/bww_multi_stream.cpp
    rope_api.cpp
    rope/problem_description.cpp
    scalar.cpp
    softmarginloss/problem_description.cpp
    softmarginloss_api.cpp
    softmax.cpp
    softmax_api.cpp
    softmax/problem_description.cpp
    solution.cpp
    solver.cpp
    solver/activ/bwd_0.cpp
    solver/activ/bwd_1.cpp
    solver/activ/fwd_0.cpp
    solver/activ/fwd_1.cpp
    solver/adam/adam.cpp
    solver/adam/transformers_adam_w.cpp
    solver/batchnorm/backward_per_activation.cpp
    solver/batchnorm/backward_per_activation_fused.cpp
    solver/batchnorm/backward_spatial.cpp
    solver/batchnorm/forward_inference.cpp
    solver/batchnorm/forward_inference_fused.cpp
    solver/batchnorm/forward_per_activation.cpp
    solver/batchnorm/forward_per_activation_fused.cpp
    solver/batchnorm/forward_spatial.cpp
    solver/cat/forward_cat.cpp
    solver/conv/conv_asm_1x1u.cpp
    solver/conv/conv_asm_1x1u_stride2.cpp
    solver/conv/conv_asm_3x3u.cpp
    solver/conv/conv_asm_5x10u2v2b1.cpp
    solver/conv/conv_asm_5x10u2v2f1.cpp
    solver/conv/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp
    solver/conv/conv_asm_dir_BwdWrW1x1.cpp
    solver/conv/conv_asm_dir_BwdWrW3x3.cpp
    solver/conv/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_bwd.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_fwd.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_perf_config.cpp
    solver/conv/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp
    solver/conv/conv_asm_implicit_gemm_v4r1_dynamic.cpp
    solver/conv/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp
    solver/conv/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp
    solver/conv/conv_bin_wino3x3U.cpp
    solver/conv/conv_bin_winoRxS.cpp
    solver/conv/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp
    solver/conv/conv_direct_naive_conv.cpp
    solver/conv/conv_direct_naive_conv_bwd.cpp
    solver/conv/conv_direct_naive_conv_fwd.cpp
    solver/conv/conv_direct_naive_conv_wrw.cpp
    solver/conv/conv_hip_implicit_gemm_bwd_data_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_bwd_v1r1.cpp
    solver/conv/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_bwd_v4r1.cpp
    solver/conv/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_v4r1.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_v4r4.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_fwd_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_grouped_bwd_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_grouped_wrw_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_nonxdlops_common.cpp
    solver/conv/conv_hip_implicit_gemm_wrw_v4r4.cpp
    solver/conv/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp
    solver/conv/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp
    solver/conv/conv_MP_bidirectional_winograd.cpp
    solver/conv/conv_mlir_igemm_bwd.cpp
    solver/conv/conv_mlir_igemm_bwd_xdlops.cpp
    solver/conv/conv_mlir_igemm_fwd.cpp
    solver/conv/conv_mlir_igemm_fwd_xdlops.cpp
    solver/conv/conv_mlir_igemm_wrw.cpp
    solver/conv/conv_mlir_igemm_wrw_xdlops.cpp
    solver/conv/conv_multipass_wino3x3WrW.cpp
    solver/conv/conv_ocl_dir2D_bwdWrW_1x1.cpp
    solver/conv/conv_ocl_dir2D_bwdWrW_2.cpp
    solver/conv/conv_ocl_dir2D_bwdWrW_53.cpp
    solver/conv/conv_ocl_dir2D11x11.cpp
    solver/conv/conv_ocl_dir2Dfwd.cpp
    solver/conv/conv_ocl_dir2Dfwd_exhaustive_search.cpp
    solver/conv/conv_ocl_dir2Dfwd1x1.cpp
    solver/conv/conv_ocl_dir2Dfwdgen.cpp
    solver/conv/conv_wino_fury_RxS.cpp
    solver/conv/conv_wino_rage_RxS.cpp
    solver/conv/conv_winoRxS.cpp
    solver/conv/fft.cpp
    solver/conv/gemm.cpp
    solver/conv/gemm_bwd.cpp
    solver/conv/gemm_common.cpp
    solver/conv/gemm_wrw.cpp
    solver/conv_asm_1x1u_bias_activ_fused.cpp
    solver/conv_bin_winoRxS_fused.cpp
    solver/conv_ck_igemm_grp_fwd_activ_fused.cpp
    solver/conv_ck_igemm_fwd_bias_activ_fused.cpp
    solver/conv_ck_igemm_grp_fwd_bias_activ_fused.cpp
    solver/conv_ck_igemm_fwd_bias_res_add_activ_fused.cpp
    solver/conv_ocl_dir2Dfwd_fused.cpp
    solver/conv_winoRxS_fused.cpp
    solver/glu/backward_glu.cpp
    solver/glu/forward_glu.cpp
    solver/groupnorm/forward_groupnorm.cpp
    solver/getitem/backward_getitem.cpp
    solver/kthvalue/forward_kthvalue.cpp
    solver/layernorm/backward_layernorm.cpp
    solver/layernorm/backward_t5layernorm.cpp
    solver/layernorm/forward_addlayernorm.cpp
    solver/layernorm/forward_layernorm.cpp
    solver/layernorm/forward_layernorm2d_ck.cpp
    solver/layernorm/forward_layernorm4d_ck.cpp
    solver/layernorm/forward_t5layernorm.cpp
    solver/mha/mha_solver_backward.cpp
    solver/mha/mha_solver_forward.cpp
    solver/multimarginloss/forward_multimarginloss.cpp
    solver/pooling/forward2d.cpp
    solver/pooling/forwardNaive.cpp
    solver/pooling/forwardNd.cpp
    solver/pooling/backward2d.cpp
    solver/pooling/backwardNd.cpp
    solver/prelu/backward_prelu_multi_weights.cpp
    solver/prelu/backward_prelu_single_weight.cpp
    solver/prelu/utils.cpp
    solver/reduce/forward_argmax.cpp
    solver/reduce/forward_argmin.cpp
    solver/reduce/forward_max.cpp
    solver/reduce/forward_min.cpp
    solver/reduce/forward_prod.cpp
    solver/reduce/forward_sum.cpp
    solver/rope/backward_rope.cpp
    solver/rope/forward_rope.cpp
    solver/softmarginloss/backward_softmarginloss.cpp
    solver/softmarginloss/forward_softmarginloss.cpp
    solver/softmax/attn_softmax.cpp
    solver/softmax/softmax.cpp
    solver/tensorOp/Op1dTensorGeneric.cpp
    solver/tensorOp/Op2dTensorGeneric.cpp
    solver/tensorOp/Op2dTensorLite.cpp
    solver/tensorOp/Op2dTensorSquash.cpp
    solver/tensorOp/Op3dTensorGeneric.cpp
    solver/tensorOp/Op4dTensorGeneric.cpp
    solver/tensorOp/Op4dTensorLite.cpp
    solver/tensorOp/Op5dTensorGeneric.cpp
    solver/tensorOp/OpTensorFwdBias.cpp
    solver/tensorOp/OpTensorLeadingOnes.cpp
    subbuffers.cpp
    t5layernorm_api.cpp
    target_properties.cpp
    temp_file.cpp
    tensor.cpp
    tensorOp/problem_description.cpp
    tensor_api.cpp
    transformers_adam_w_api.cpp
    seq_tensor.cpp
    api/tuning.cpp
)

if(MIOPEN_ENABLE_AI_KERNEL_TUNING OR MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK)
    list(APPEND MIOpen_Source conv/heuristics/ai_heuristics.cpp)
    list(APPEND MIOpen_Source anyramdb.cpp)
endif()

list(APPEND MIOpen_Source tmp_dir.cpp binary_cache.cpp md5.cpp)
if(MIOPEN_ENABLE_SQLITE)
    list(APPEND MIOpen_Source sqlite_db.cpp)
endif()

if(MIOPEN_ENABLE_SQLITE AND MIOPEN_ENABLE_SQLITE_KERN_CACHE)
    list(APPEND MIOpen_Source kern_db.cpp bz2.cpp)
endif()

if(MIOPEN_USE_COMPOSABLEKERNEL)
    list(APPEND MIOpen_Source kernels/ck_header_only/layernorm/fwd/device_layernorm2d_fwd_f16_instance.cpp)
    list(APPEND MIOpen_Source kernels/ck_header_only/layernorm/fwd/device_layernorm2d_fwd_f32_instance.cpp)
    list(APPEND MIOpen_Source kernels/ck_header_only/layernorm/fwd/device_layernorm4d_fwd_f16_instance.cpp)
    list(APPEND MIOpen_Source kernels/ck_header_only/layernorm/fwd/device_layernorm4d_fwd_f32_instance.cpp)
endif()

if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU")
    file(GLOB_RECURSE STATIC_COMPOSABLE_KERNEL_INCLUDE "kernels/static_composable_kernel/include/*/*.hpp")
    file(GLOB_RECURSE STATIC_COMPOSABLE_KERNEL_SOURCE "kernels/static_composable_kernel/src/*/*.cpp")
    file(GLOB_RECURSE COMPOSABLE_KERNEL_INCLUDE "legacy_composable_kernel/composable_kernel/include/*.hpp")
    file(GLOB_RECURSE COMPOSABLE_KERNEL_SOURCE "legacy_composable_kernel/composable_kernel/src/*.cpp")
    file(GLOB_RECURSE COMPOSABLE_KERNEL_DYNAMIC_ASM_SOURCE "kernels/dynamic_igemm/*.s")
    file(GLOB_RECURSE COMPOSABLE_KERNEL_DYNAMIC_ASM_INCLUDE "kernels/dynamic_igemm/*.inc")
    file(GLOB_RECURSE COMPOSABLE_KERNEL_DYNAMIC_CPP_SOURCE "kernels/dynamic_igemm/*.cpp")
    file(GLOB_RECURSE GPU_REFERENCE_KERNEL_HIP "kernels/gpu_reference_kernel/*.cpp")
    file(GLOB_RECURSE GPU_REFERENCE_KERNEL_ASM "kernels/gpu_reference_kernel/*.s")
    file(GLOB_RECURSE GPU_BATCHED_TRANSPOSE_KERNEL_HIP "kernels/gpu_batched_transpose_kernel/*.cpp")
    file(GLOB_RECURSE GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_INCLUDE "kernels/gpu_general_tensor_reorder_kernel/*.hpp")
    file(GLOB_RECURSE GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_SOURCE "kernels/gpu_general_tensor_reorder_kernel/*.cpp")


    set(MIOPEN_KERNEL_INCLUDES
        ${STATIC_COMPOSABLE_KERNEL_INCLUDE}
        ${COMPOSABLE_KERNEL_INCLUDE}
        ${COMPOSABLE_KERNEL_DYNAMIC_ASM_INCLUDE}
        ${GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_INCLUDE}
        kernels/activation_functions.hpp
        kernels/gpu_reference_kernel/fp8_kern_types.h
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride1.inc
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride2_dec.inc
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride2_dil.inc
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride1.inc
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride2_dec.inc
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride2_dil.inc
        kernels/Conv_Winograd_v13_3_12_epilogue.inc
        kernels/Conv_Winograd_v13_3_12_prologue.inc
        kernels/Conv_Winograd_v16_5_0_epilogue.inc
        kernels/Conv_Winograd_v16_5_0_prologue.inc
        kernels/Conv_Winograd_v16_5_0_stride1.inc
        kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc
        kernels/conv_3x3_wheel_alpha_v9_2_7_prologue.inc
        kernels/conv_3x3_wheel_alpha_v9_2_7_gfx8_stride_2_dec.inc
        kernels/conv_3x3_wheel_alpha_v9_2_7_gfx8.inc
        kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc
        kernels/conv_3x3_wheel_alpha_v3_0b_prologue.inc
        kernels/conv_3x3_wheel_alpha_v3_0b.inc
        kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc
        kernels/conv_3x3_wheel_alpha_v7_0_3b_prologue.inc
        kernels/conv_3x3_wheel_alpha_v7_0_3b.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_prologue.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx8_stride_2_dil.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx8_stride_2_dec.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx8.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9_stride_2_dil.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9_stride_2_dec.inc
        kernels/conv_3x3_wheel_alpha_v9_0_15_gfx9.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp16_dot2_edc_f2x3_dilation2.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp16_dot2_edc_f2x3_stride1.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp16_dot2_edc_f2x3_stride2.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp16_dot2_edc_f3x2_stride1.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp32_f2x3_dilation2.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp32_f2x3_stride1.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp32_f2x3_stride2.inc
        kernels/Conv_Winograd_v21_1_3_gfx9_fp32_f3x2_stride1.inc
        kernels/Conv_Winograd_v21_1_3_metadata.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f2x3_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f2x3_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f2x3_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp16_dot2_edc_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx94x_fp32_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp16_dot2_edc_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx9_fp32_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp16_dot2_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx10_fp32_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp16_dot2_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_dilation2.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride1.inc
        kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride2.inc
        kernels/Conv_Winograd_v30_3_1_metadata.inc
        kernels/MIOpenReduceCalculation.hpp
        kernels/MIOpenReduceExtreme.hpp
        kernels/bfloat16_dev.hpp
        kernels/block_reduce.hpp
        kernels/conv_common.inc
        kernels/conv_sizes.inc
        kernels/float_types.h
        kernels/gpr_alloc.inc
        kernels/hip_atomic.hpp
        kernels/hip_f8_impl.hpp
        kernels/hip_float8.hpp
        kernels/inst_wrappers.inc
        kernels/miopen_cstdint.hpp
        kernels/miopen_limits.hpp
        kernels/miopen_rocrand.hpp
        kernels/miopen_type_traits.hpp
        kernels/miopen_utility.hpp
        kernels/miopen_warp_size.hpp
        kernels/neuron.inc
        kernels/radix.hpp
        kernels/rocm_version.inc
        kernels/stride_array.hpp
        kernels/tensor_view.hpp
        kernels/utilities.inc
        kernels/warp_reduce.hpp
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_gfx11_1536vgprs_fp16_fp16acc_f2x3_c16_stride1.inc
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_gfx11_1536vgprs_fp16_fp16acc_f2x3_c32_stride1.inc
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_gfx11_1024vgprs_fp16_fp16acc_f2x3_c16_stride1.inc
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_metadata.inc
        kernels/winograd/Conv_Winograd_Rage_v4_6_0_gfx94x_fp16_fp32acc_f2x3_stride1.inc
        kernels/winograd/Conv_Winograd_Rage_v4_6_0_metadata.inc
        kernels/winograd/Conv_Winograd_Rage_v4_7_0_gfx94x_fp16_fp32acc_f2x3_stride1.inc
        kernels/winograd/Conv_Winograd_Rage_v4_7_0_metadata.inc
        kernels/xform_bidirect_winograd_code.inc
        kernels/xform_data_filter.inc
        kernels/xform_kd_cov2.inc
        kernels/xform_metadata.inc
        )

    set(MIOPEN_KERNELS
        ${STATIC_COMPOSABLE_KERNEL_SOURCE}
        ${COMPOSABLE_KERNEL_SOURCE}
        ${COMPOSABLE_KERNEL_DYNAMIC_ASM_SOURCE}
        ${COMPOSABLE_KERNEL_DYNAMIC_CPP_SOURCE}
        ${GPU_REFERENCE_KERNEL_HIP}
        ${GPU_REFERENCE_KERNEL_ASM}
        ${GPU_BATCHED_TRANSPOSE_KERNEL_HIP}
        ${GPU_GENERAL_TENSOR_REORDER_KERNEL_HIP_SOURCE}
        kernels/MIOpenAdam.cpp
        kernels/MIOpenCat.cpp
        kernels/MIOpenCheckNumerics.cpp
        kernels/MIOpenBatchNormActivBwdPerAct.cl
        kernels/MIOpenBatchNormActivBwdSpatial.cl
        kernels/MIOpenBatchNormActivFwdTrainPerAct.cl
        kernels/MIOpenBatchNormActivFwdTrainSpatial.cl
        kernels/MIOpenBatchNormFwdTrainSpatial.cl
        kernels/MIOpenBatchNormFwdTrainPerAct.cl
        kernels/MIOpenBatchNormFwdInferSpatial.cl
        kernels/MIOpenBatchNormFwdInferPerAct.cl
        kernels/MIOpenBatchNormBwdSpatial.cl
        kernels/MIOpenBatchNormBwdPerAct.cl
        kernels/MIOpenConvDirUni.cl
        kernels/MIOpenConvDirBatchNormActiv.cl
        kernels/MIOpenConvDirGenFwd.cl
        kernels/MIOpenGLU.cpp
        kernels/MIOpenGroupNorm.cpp
        kernels/MIOpenGetitem.cpp
        kernels/MIOpenKthvalue.cpp
        kernels/MIOpenLayerNorm.cpp
        kernels/MIOpenLRNBwd.cl
        kernels/MIOpenLRNFwd.cl
        kernels/MIOpenMultiMarginLoss.cpp
        kernels/MIOpenNeuron.cl
        kernels/MIOpenPReLU.cpp
        kernels/MIOpenPooling.cl
        kernels/MIOpenPoolingBwd.cl
        kernels/MIOpenPoolingBwdND.cl
        kernels/MIOpenPoolingForwardNaive.cl
        kernels/MIOpenPoolingND.cl
        kernels/MIOpenConv1x1S.cl
        kernels/MIOpenConv1x1J1.cl
        kernels/MIOpenConv1x1J1_stride.cl
        kernels/MIOpenReduceCalculation.cpp
        kernels/MIOpenReduceExtreme.cpp
        kernels/MIOpenReduceSum.cpp
        kernels/MIOpenRoPE.cpp
        kernels/MIOpenSoftMarginLoss.cpp
        kernels/MIOpenSoftmax.cl
        kernels/MIOpenSoftmaxAttn.cpp
        kernels/MIOpenUtilKernels3.cl
        kernels/MIOpenUtilKernels4.cl
        kernels/MIOpenUtilKernels5.cl
        kernels/MIOpenVecAdd.cpp
        kernels/MIOpenVecAddOCL.cl
        kernels/MIOpenIm2d2Col.cl
        kernels/MIOpenIm3d2Col.cl
        kernels/MIOpenCol2Im2d.cl
        kernels/MIOpenCol2Im3d.cl
        kernels/MIOpenConvBwdWrWS2.cl
        kernels/MIOpenGroupConvBwdWrWS2.cl
        kernels/MIOpenConvBwdWrW_LxG_P53.cl
        kernels/MIOpenGroupConvBwdWrW_LxG_P53.cl
        kernels/MIOpenConvBwdWrW_LxG_5x5.cl
        kernels/MIOpenConvBwdWrW1x1_PAD_read4.cl
        kernels/MIOpenConvFwd_LxL_11.cl
        kernels/MIOpenConvFFT.cl
        kernels/MIOpenRNNHiddenStateUpdate.cl
        kernels/bugzilla_34765_detect.s
        kernels/dummy_kernel.s
        kernels/conv3x3.s
        kernels/conv1x1u.s
        kernels/conv1x1u_stride2.s
        kernels/conv1x1u_bias_activ.s
        kernels/conv3x3wrw.s
        kernels/conv1x1wrw.s
        kernels/conv5x10u2v2f1.s
        kernels/conv5x10u2v2b1.s
        kernels/conv7x7c3h224w224k64u2v2p3q3f1.s
        kernels/xform_out.s
        kernels/gcnAsmBNBwdTrainSpatial.s
        kernels/MIOpenTensorKernels.cl
        kernels/MIOpenTensorKernelsHip.cpp
        kernels/MIOpenSubTensorOpWithScalarKernel.cl
        kernels/MIOpenSubTensorOpWithSubTensorKernel.cl
        kernels/MIOpenSubTensorOpWithCastTensorKernel.cl
        kernels/MIOpenSubTensorOpWithTransformKernel.cl
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride1.s
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride2_dec.s
        kernels/Conv_Winograd_v13_3_12_fp16dot_stride2_dil.s
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride1.s
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride2_dec.s
        kernels/Conv_Winograd_v14_3_3_fp16dot_stride2_dil.s
        kernels/Conv_Winograd_v16_5_0_stride1.s
        kernels/conv_3x3_wheel_alpha_v9_0_15_stride_2_dil.s
        kernels/conv_3x3_wheel_alpha_v9_0_15_stride_2_dec.s
        kernels/conv_3x3_wheel_alpha_v9_0_15.s
        kernels/conv_3x3_wheel_alpha_v7_0_3b.s
        kernels/conv_3x3_wheel_alpha_v3_0b.s
        kernels/conv_3x3_wheel_alpha_v9_2_7.s
        kernels/conv_3x3_wheel_alpha_v9_2_7_stride_2_dec.s
        kernels/Conv_Winograd_v21_1_3_fp16_dot2_f2x3_dilation2.s
        kernels/Conv_Winograd_v21_1_3_fp16_dot2_f2x3_stride1.s
        kernels/Conv_Winograd_v21_1_3_fp16_dot2_f2x3_stride2.s
        kernels/Conv_Winograd_v21_1_3_fp16_dot2_f3x2_stride1.s
        kernels/Conv_Winograd_v21_1_3_fp32_f2x3_dilation2.s
        kernels/Conv_Winograd_v21_1_3_fp32_f2x3_stride1.s
        kernels/Conv_Winograd_v21_1_3_fp32_f2x3_stride2.s
        kernels/Conv_Winograd_v21_1_3_fp32_f3x2_stride1.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f2x3_dilation2.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f2x3_stride1.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f2x3_stride2.s
        kernels/Conv_Winograd_v30_3_1_fp32_f2x3_dilation2.s
        kernels/Conv_Winograd_v30_3_1_fp32_f2x3_stride1.s
        kernels/Conv_Winograd_v30_3_1_fp32_f2x3_stride2.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f3x2_dilation2.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f3x2_stride1.s
        kernels/Conv_Winograd_v30_3_1_fp16_dot2_f3x2_stride2.s
        kernels/Conv_Winograd_v30_3_1_fp32_f3x2_dilation2.s
        kernels/Conv_Winograd_v30_3_1_fp32_f3x2_stride1.s
        kernels/Conv_Winograd_v30_3_1_fp32_f3x2_stride2.s
        kernels/MIOpenConvBwdBias.cl
        kernels/MIOpenBatchNormActivInfer.cl
        kernels/MIOpenBatchNormActivInferHIP.cpp
        kernels/MIOpenCTCLoss.cl
        kernels/MIOpenDropoutHIP.cpp
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_fp16_fp16acc_f2x3_c16_stride1.s
        kernels/winograd/Conv_Winograd_Fury_v2_4_1_fp16_fp16acc_f2x3_c32_stride1.s
        kernels/winograd/Conv_Winograd_Rage_v4_6_0_fp16_fp32acc_f2x3_stride1.s
        kernels/winograd/Conv_Winograd_Rage_v4_7_0_fp16_fp32acc_f2x3_stride1.s
        kernels/xform_data.s
        kernels/xform_filter.s
        kernels/xform_bidirect_winograd_data.s
        kernels/xform_bidirect_winograd_filter.s
        kernels/xform_bidirect_winograd_out.s
        kernels/UniversalTranspose.cl)

    # Kernels in development lists.
    # Should be ALWAYS empty in develop branch (at the time of PR merge)
    # Intention: to speed up kernel development rebuild time
    set(MIOPEN_DEVELOPMENT_KERNELS)

    # Only referenced by MIOPEN_DEVELOPMENT_KERNELS
    set(MIOPEN_DEVELOPMENT_KERNEL_INCLUDES)

    LIST(LENGTH MIOPEN_DEVELOPMENT_KERNELS MIOPEN_DEVELOPMENT_KERNELS_COUNT)
    LIST(LENGTH MIOPEN_DEVELOPMENT_KERNEL_INCLUDES MIOPEN_DEVELOPMENT_KERNEL_INCLUDES_COUNT)

    add_kernels("kernel.cpp" "MIOPEN_KERNEL_" "" "${MIOPEN_KERNELS}")
    add_kernels("kernel_includes.cpp" "MIOPEN_KERNEL_" "_INCLUDE" "${MIOPEN_KERNEL_INCLUDES}")

    if(${MIOPEN_DEVELOPMENT_KERNELS_COUNT})
        add_kernels("kernel.cpp" "MIOPEN_KERNEL_" "" "${MIOPEN_DEVELOPMENT_KERNELS}")
    endif()

    if(${MIOPEN_DEVELOPMENT_KERNEL_INCLUDES_COUNT})
        add_kernels("kernel_includes.cpp" "MIOPEN_KERNEL_" "_INCLUDE" "${MIOPEN_DEVELOPMENT_KERNEL_INCLUDES}")
    endif()

    configure_file(db_path.cpp.in ${PROJECT_BINARY_DIR}/db_path.cpp)
    list(APPEND MIOpen_Source
        activ.cpp
        adam.cpp
        addlayernorm.cpp
        cat.cpp
        exec_utils.cpp
        groupnorm.cpp
        getitem.cpp
        glu.cpp
        kernel_cache.cpp
        kthvalue.cpp
        layernorm.cpp
        lrn.cpp
        mlo_dir_conv.cpp
        multimarginloss.cpp
        ocl/activ_ocl.cpp
        ocl/batchnormocl.cpp
        ocl/convolutionocl.cpp
        ocl/lrn_ocl.cpp
        ocl/mloNorm.cpp
        ocl/pooling_ocl.cpp
        ocl/rnnocl.cpp
        ocl/utilocl.cpp
        ocl/ctcocl.cpp
        ocl/dropoutocl.cpp
        ocl/gcn_asm_utils.cpp
        ocl/rnn_util_ocl.cpp
        hip/hip_build_utils.cpp
        hip/batched_transpose_sol.cpp
        hip/general_tensor_reorder_sol.cpp
        pooling.cpp
        t5layernorm.cpp
        ocl/fusionopconvocl.cpp
        ocl/fusionopbiasbnactivocl.cpp
        prelu.cpp
        reducecalculation.cpp
        reduceextreme.cpp
        rope.cpp
        softmarginloss.cpp
        transformers_adam_w.cpp
        ${PROJECT_BINARY_DIR}/db_path.cpp
        )

    list(INSERT MIOpen_Source 0
        ${PROJECT_BINARY_DIR}/kernel.cpp
        ${PROJECT_BINARY_DIR}/kernel_includes.cpp
        )
endif()

if(MIOPEN_USE_ROCBLAS OR MIOPEN_USE_HIPBLASLT)
    list(APPEND MIOpen_Source
        gemm_v2.cpp
    )
endif()

if( MIOPEN_BACKEND STREQUAL "OpenCL" )
    list(APPEND MIOpen_Source
        ocl/handleocl.cpp
        ocl_kernel.cpp
        ocl/oclerrors.cpp
        ocl/clhelper.cpp
    )
endif()

if( MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
    list(APPEND MIOpen_Source
        hip/hiperrors.cpp
        hip/handlehip.cpp
        hipoc/hipoc_kernel.cpp
        hipoc/hipoc_program.cpp
        )
endif()

if( MIOPEN_BACKEND STREQUAL "HIPNOGPU")
    list(APPEND MIOpen_Source
        hip/hiperrors.cpp
        nogpu/handle.cpp
        hipoc/hipoc_kernel.cpp
        hipoc/hipoc_program.cpp
        )
endif()

if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU")
    set(KERNELS_SRC_BATCH_FACTOR 50 CACHE STRING "Amount of kernel source files to inline to a single object file.")
    set(KERNELS_BATCH_ID 0)

    function(inline_kernels_src BATCH_FACTOR KERNELS KERNEL_INCLUDES EXTRA_OPTIONS MESSAGE_SUFFIX)
        set(KERNELS_BATCH)
        set(KERNELS_BATCH_SIZE 0)
        set(PROCESSED 0)
        list(LENGTH KERNELS KERNELS_NUMBER)

        foreach(KERNEL ${KERNELS})
            list(APPEND KERNELS_BATCH ${KERNEL})
            list(LENGTH KERNELS_BATCH KERNELS_BATCH_SIZE)
            math(EXPR PROCESSED "1+${PROCESSED}")
            if((KERNELS_BATCH_SIZE EQUAL ${BATCH_FACTOR}) OR (PROCESSED EQUAL KERNELS_NUMBER))
                set(KERNEL_SRC_HPP_FILENAME batch_${KERNELS_BATCH_ID}.cpp.hpp)
                set(KERNEL_SRC_HPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/${KERNEL_SRC_HPP_FILENAME})
                set(KERNEL_SRC_CPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/batch_${KERNELS_BATCH_ID}.cpp)

                add_custom_command(
                    OUTPUT ${KERNEL_SRC_HPP_PATH}
                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                    DEPENDS addkernels ${KERNELS_BATCH} ${KERNEL_INCLUDES}
                    COMMAND $<TARGET_FILE:addkernels> -target ${KERNEL_SRC_HPP_PATH} -extern ${EXTRA_OPTIONS} -source ${KERNELS_BATCH}
                    COMMENT "Inlining kernels batch #${KERNELS_BATCH_ID}${MESSAGE_SUFFIX}"
                    )
                configure_file(kernels/kernels_batch.cpp.in ${KERNEL_SRC_CPP_PATH})
                list(APPEND MIOpen_Source ${KERNEL_SRC_CPP_PATH} ${KERNEL_SRC_HPP_PATH})

                set(KERNELS_BATCH)
                math(EXPR KERNELS_BATCH_ID "1+${KERNELS_BATCH_ID}")
            endif()
        endforeach()

        set(KERNELS_BATCH_ID ${KERNELS_BATCH_ID} PARENT_SCOPE)
        set(MIOpen_Source ${MIOpen_Source} PARENT_SCOPE)
    endfunction()

    inline_kernels_src(${KERNELS_SRC_BATCH_FACTOR} "${MIOPEN_KERNELS}" "${MIOPEN_KERNEL_INCLUDES}" "" "")
    inline_kernels_src(${KERNELS_SRC_BATCH_FACTOR} "${MIOPEN_KERNEL_INCLUDES}" "" "-no-recurse;-mark-includes" " (includes)")

    set(MIOPEN_DEVELOPMENT_KERNELS_DEPS ${MIOPEN_KERNEL_INCLUDES})
    list(APPEND MIOPEN_DEVELOPMENT_KERNELS_DEPS ${MIOPEN_DEVELOPMENT_KERNEL_INCLUDES})

    if(${MIOPEN_DEVELOPMENT_KERNELS_COUNT})
        inline_kernels_src(${KERNELS_SRC_BATCH_FACTOR} "${MIOPEN_DEVELOPMENT_KERNELS}" "${MIOPEN_DEVELOPMENT_KERNELS_DEPS}" "" " (dev kernels)")
    endif()

    if(${MIOPEN_DEVELOPMENT_KERNEL_INCLUDES_COUNT})
        inline_kernels_src(${KERNELS_SRC_BATCH_FACTOR} "${MIOPEN_DEVELOPMENT_KERNEL_INCLUDES}" "" "-no-recurse;-mark-includes" " (dev includes)")
    endif()

endif()

if(MIOPEN_USE_COMGR)
    list(APPEND MIOpen_Source comgr.cpp)
endif()

if(MIOPEN_USE_MLIR)
    list(APPEND MIOpen_Source
        conv/invokers/mlir_impl_gemm.cpp
        mlir_build.cpp
        solver/conv/mlir_common.cpp
        )
endif()

# build library
if(MIOPEN_ENABLE_SQLITE)
    add_library( MIOpen
        ${MIOpen_Source}
        $<TARGET_OBJECTS:sqlite_memvfs>
        )
else()
    add_library( MIOpen
        ${MIOpen_Source}
        )
endif()

rocm_set_soversion(MIOpen ${MIOpen_SOVERSION})

clang_tidy_check(MIOpen)

if(HAS_LIB_STD_FILESYSTEM)
    target_link_libraries(MIOpen PRIVATE stdc++fs)
endif()

find_package(zstd)
if(zstd_FOUND)
    target_link_libraries(MIOpen PRIVATE $<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>)
endif()

function(target_internal_library TARGET)
    target_link_libraries(${TARGET} PRIVATE ${ARGN})
    target_link_libraries(${TARGET} INTERFACE $<BUILD_INTERFACE:${ARGN}>)
endfunction()

target_include_directories(MIOpen PUBLIC
    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src/include>
)

if(MIOPEN_USE_COMPOSABLEKERNEL)
    if(MIOPEN_BUILD_CK )
        # the aliased target doesnt exist when we build CK inline as part of MIOPEN.  The aliased target only is not
        # global when you build it so we dont have access to it.
        set(MIOPEN_CK_LINK_FLAGS device_conv_operations hip::host)

        # The include directories also dont propagate correctly when using the non aliased targets.
        # Manually including them fixes this problem for inline builds.
        target_include_directories(MIOpen SYSTEM PRIVATE
            ${MIOPEN_CK_INCLUDE_DIR}
            ${MIOPEN_CK_BUILD_INCLUDE_DIR}
            ${MIOPEN_CK_LIBRARY_INCLUDE_DIR})
    else()
        # Use the aliased targets when we pull CK from /opt/rocm or other place on disc.
        set(MIOPEN_CK_LINK_FLAGS composable_kernel::device_conv_operations hip::host)
    endif()
endif()

if(WIN32)
    # Refer to https://en.cppreference.com/w/cpp/language/types for details.
    target_compile_options(MIOpen PRIVATE $<BUILD_INTERFACE:$<$<CXX_COMPILER_ID:Clang>:-U__LP64__>>)
endif()

target_include_directories(MIOpen SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
# Workaround : change in rocm-cmake was causing linking error so had to add ${CMAKE_DL_LIBS}
#               We can remove ${CMAKE_DL_LIBS} once root cause is identified.
target_link_libraries(MIOpen PRIVATE ${CMAKE_DL_LIBS} Threads::Threads BZip2::BZip2 ${MIOPEN_CK_LINK_FLAGS})
miopen_generate_export_header(MIOpen)

if(WIN32)
    # Temporary workaround on rocMLIR not exporting correctly libraries it depends on.
    target_link_libraries(MIOpen PRIVATE ntdll)
endif()

if(BUILD_TESTING)
    # On Windows, export selected internal symbols only when tests are built. The officially released
    # binaries must not have internals exposed because doing so violates the threats model requirements.
    # We cannot use the CMake property CMAKE_EXPORT_ALL_SYMBOLS here because the number of automatically
    # exported symbols exceeds the maximum allowed number in a DLL library (64K).
    # See details here: https://learn.microsoft.com/en-us/cpp/build/exporting-from-a-dll?view=msvc-170
    generate_export_header(MIOpen BASE_NAME MIOPEN_INTERNALS EXPORT_FILE_NAME ${CMAKE_BINARY_DIR}/include/miopen/export_internals.h)
    target_compile_definitions(MIOpen PUBLIC $<BUILD_INTERFACE:MIOPEN_BUILD_TESTING>)
endif()

if(MIOPEN_ENABLE_AI_KERNEL_TUNING OR MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK)
    target_link_libraries(MIOpen PRIVATE frugally-deep::fdeep Eigen3::Eigen)
    if(NOT TARGET nlohmann_json)
        # frugally-deep has broken linking to nlohmann_json
        add_library(nlohmann_json INTERFACE IMPORTED GLOBAL)
        target_link_libraries(nlohmann_json INTERFACE nlohmann_json::nlohmann_json)
    endif()
    file(GLOB MODEL_FILES CONFIGURE_DEPENDS kernels/*.model)
    if(NOT ENABLE_ASAN_PACKAGING )
        install(FILES ${MODEL_FILES} DESTINATION ${DATABASE_INSTALL_DIR})
    endif()
    foreach(MODEL_FILE ${MODEL_FILES})
        get_filename_component(MODEL_FILE_FILENAME "${MODEL_FILE}" NAME)
        configure_file("${MODEL_FILE}" "${PROJECT_BINARY_DIR}/${DATABASE_INSTALL_DIR}/${MODEL_FILE_FILENAME}" COPYONLY)
    endforeach()
endif()

############################################################
# MIOpen depends on OpenCL
if( MIOPEN_BACKEND STREQUAL "OpenCL")
    MESSAGE( STATUS "MIOpen linking OpenCL: ${OPENCL_INCLUDE_DIRS}" )
    target_include_directories(MIOpen SYSTEM PUBLIC ${OPENCL_INCLUDE_DIRS} )
    target_link_libraries( MIOpen PUBLIC ${OPENCL_LIBRARIES} )
    list(APPEND PACKAGE_DEPENDS PACKAGE OpenCL)
elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
    target_link_libraries( MIOpen PRIVATE hip::device )
    target_link_libraries( MIOpen INTERFACE hip::host )
    if(MIOPEN_USE_HIPRTC)
        if(WIN32)
            target_link_libraries( MIOpen PRIVATE hiprtc::hiprtc )
        else()
            target_link_libraries( MIOpen PRIVATE hiprtc)
        endif()
    endif()
    if(ENABLE_HIP_WORKAROUNDS)
        # Workaround hip not setting its usage requirements correctly
        target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
    endif()
    # This is helpful for the tests
    target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
    list(APPEND PACKAGE_DEPENDS PACKAGE hip)
endif()

if(MIOPEN_USE_COMGR)
    list(APPEND PACKAGE_DEPENDS PACKAGE amd_comgr)
    target_internal_library(MIOpen amd_comgr)
endif()

if(MIOPEN_OFFLINE_COMPILER_PATHS_V2)
    # Adding rocm-core library dependency for API getROCmInstallPath()
    target_link_libraries(MIOpen PRIVATE rocm-core)
endif()

if(rocblas_FOUND)
    target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:roc::rocblas> )
    target_link_libraries( MIOpen PRIVATE roc::rocblas )
    list(APPEND PACKAGE_STATIC_DEPENDS PACKAGE rocblas)
endif()

if(hipblaslt_FOUND)
    target_link_libraries( MIOpen PRIVATE roc::hipblaslt )
    target_include_directories( MIOpen INTERFACE $<BUILD_INTERFACE:${HIPBLAS-COMMON_INCLUDE_DIR}> )
endif()

target_include_directories( MIOpen PRIVATE ${ROCRAND_INCLUDE_DIR} )

# For backward compatibility with ROCm 5.3
# Build with library libMLIRMIOpen
if(LIBMLIRMIOPEN)
    target_link_libraries(MIOpen PRIVATE ${LIBMLIRMIOPEN})
endif()

# Build with package rocMLIR
if(rocMLIR_FOUND)
    target_link_libraries(MIOpen PRIVATE rocMLIR::rockCompiler)
endif()

target_link_libraries(MIOpen PRIVATE nlohmann_json::nlohmann_json)
target_link_libraries(MIOpen INTERFACE $<BUILD_INTERFACE:nlohmann_json::nlohmann_json>)

target_internal_library(MIOpen
    Boost::filesystem
)
list(APPEND PACKAGE_STATIC_DEPENDS PACKAGE Boost COMPONENTS filesystem)
if(NOT WIN32 AND NOT APPLE)
    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/lib.def "
MIOPEN_${MIOPEN_BACKEND}_1
{
global:
    miopen*;
    extern \"C++\" {
        miopen::*;
    };
local:
    *boost*;
    extern \"C++\" {
        std::*;
    };
};
")
    target_link_libraries(MIOpen PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_BINARY_DIR}/lib.def")
    target_link_libraries(MIOpen PRIVATE "-Wl,--exclude-libs,ALL")
    # set_target_properties(MIOpen PROPERTIES CXX_VISIBILITY_PRESET hidden)
    set_target_properties(MIOpen PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
endif()
#######################################
if(MIOPEN_ENABLE_SQLITE)
    # MIOpen depends on SQLite
    target_link_libraries(MIOpen PRIVATE SQLite::SQLite3)
    target_link_libraries(MIOpen INTERFACE  $<BUILD_INTERFACE:SQLite::SQLite3>)
endif()
############################################################
# MIOpen depends on librt for Boost.Interprocess
if(NOT WIN32 AND NOT APPLE)
    find_library(LIBRT rt)
    if(LIBRT)
        message(STATUS "Librt: " ${LIBRT})
        target_internal_library(MIOpen ${LIBRT})
    endif()
endif()

if(MIOPEN_USE_ROCTRACER)
    target_include_directories(MIOpen PRIVATE ${ROCTRACER_INCLUDE_DIR})
    target_include_directories(MIOpen INTERFACE $<BUILD_INTERFACE:${ROCTRACER_INCLUDE_DIR}>)
    target_link_libraries(MIOpen PRIVATE ${rocTracer})
endif()

############################################################
# Installation
set(MIOPEN_CXX_HEADER_PATH)
if(MIOPEN_INSTALL_CXX_HEADERS)
set(MIOPEN_CXX_HEADER_PATH ${PROJECT_SOURCE_DIR}/src/include)
endif()

rocm_install_targets(
  TARGETS MIOpen
  INCLUDE
    ${PROJECT_SOURCE_DIR}/include
    ${PROJECT_BINARY_DIR}/include
    ${MIOPEN_CXX_HEADER_PATH}
)

rocm_export_targets(
  TARGETS MIOpen
  DEPENDS
    ${PACKAGE_DEPENDS}
  STATIC_DEPENDS
    ${PACKAGE_STATIC_DEPENDS}
)

# Install db files
if(NOT MIOPEN_EMBED_DB STREQUAL "")
    include(embed)
    if(MIOPEN_EMBED_BINCACHE AND MIOPEN_BINCACHE_PATH STREQUAL "")
        if(MIOPEN_NO_LFS_PULLED)
            message(WARNING "Binary cache files have not been pulled down from git-lfs, will not embed.")
        else()
            set(MIOPEN_BINCACHE_PATH ${KERNELS_BINARY_DIR})
            message("MIOPEN_BINCACHE_PATH: ${MIOPEN_BINCACHE_PATH}")
        endif()
    else()
        message(WARNING "MIOPEN_EMBED_BINCACHE is set and MIOPEN_BINCACHE_PATH was used to override default binary cache files. Proceed at your own risk!")
    endif()
# embed find db
    foreach(EMBED_ARCH ${MIOPEN_EMBED_DB})
        message(STATUS "Adding find db for arch: ${EMBED_ARCH}")
        list(APPEND CODE_OBJECTS "${PROJECT_BINARY_DIR}/${DATABASE_INSTALL_DIR}/${EMBED_ARCH}.${MIOPEN_BACKEND}.fdb.txt")
        message(STATUS "Adding perf db for arch: ${EMBED_ARCH}")
        list(APPEND CODE_OBJECTS "${PROJECT_BINARY_DIR}/${DATABASE_INSTALL_DIR}/${EMBED_ARCH}.db${PERFDB_SUFFIX}")
    endforeach()
# Embed Bin Cache
    if(NOT MIOPEN_BINCACHE_PATH STREQUAL "")
        foreach(EMBED_ARCH ${MIOPEN_EMBED_DB})
            message(STATUS "Adding binary cache for arch: ${EMBED_ARCH}")
            download_binary(OUTPUT_PATH "${MIOPEN_BINCACHE_PATH}" "${EMBED_ARCH}")
            list(APPEND CODE_OBJECTS "${OUTPUT_PATH}")
        endforeach()
    endif()
    add_embed_library(miopen_data ${CODE_OBJECTS})
    target_link_libraries(MIOpen PRIVATE $<BUILD_INTERFACE:miopen_data> )
endif()
