lundi 30 mars 2020

CMake + CUDA “invalid device function” even with correct SM version

I have been trying to write a very simple cuda library. I am encountering the error "Invalid Device Function". I have verified that the correct architecture is set as the build flag. And, that the cuda install is good by running deviceQuery and running various other samples.

I cannot share the full source. But I can try to provide a minimal example.

I have a file in which I have the kernel

__global__
void foo( float *ptr_a, uint8_t *ptr_b )
{
 //do stuff
}

And a wrapper function that

void IteratorTestCuda::fooWrapper( uint8_t &aptr )
{
   gpuErrorCheck( cudaMemcpy( ptr_device, ptr_host, sizeof( uint8_t ), cudaMemcpyHostToDevice  ) );

   gpuErrorCheck( cudaGetLastError() );

   //b is a member
   foo << < 1, 1 >> > ( a, b );

   gpuErrorCheck( cudaGetLastError() );

   //do other stuff
}

I am erroring out at the cudaGetLastError call and the error is Invalid Device Function. I have been debugging this for a while and it is because the the requested device function does not exist or is not compiled for the proper device architecture. However, as stated earlier I am providing the correct build flag.

In my cmakelists, I have:

cmake_minimum_required(VERSION 3.5)
project(foo CXX C CUDA )

find_package(CUDA 10.0 REQUIRED)

include_directories(
 include/${PROJECT_NAME}
 ${catkin_INCLUDE_DIRS}
 ${CUDA_INCLUDE_DIRS}
)


if (CUDA_FOUND)
  #Get CUDA compute capability

    set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/src/cuda_info)
    execute_process(COMMAND bash -c <SomeFilePath> OUTPUT_VARIABLE FILEHINT)
    set(CUDAFILE "${FILEHINT}/src/computeCapability.cu"  )
    execute_process(COMMAND bash -c "nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE}")
    execute_process(COMMAND bash -c "${OUTPUTFILE}" OUTPUT_VARIABLE ARCH)

    SET(CUDA_NVCC_FLAGS "${ARCH} --relocatable-device-code=true -ldl -lrt -lcufft -lcurand" CACHE STRING "nvcc flags" FORCE)
    SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-std=c++11 -O3")
    SET(CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
    SET(LIB_TYPE STATIC)

else()
    message(FATAL_ERROR "CUDA NOT FOUND!")
endif()

SET(LIB_TYPE_TEST SHARED)

CUDA_ADD_LIBRARY( fooCuda ${LIB_TYPE_TEST} srcFoo.cu headerFoo.cuh )

add_dependencies( fooCuda $project_depends} )

CUDA_ADD_LIBRARY( barCuda ${LIB_TYPE_TEST} srcBar.cu headerBar.cuh )

add_dependencies( barCuda $project_depends} )

#src.cpp is another class which calls that function
add_executable( fooCpp src.cpp srcFoo.cu srcBar.cu )

add_dependencies( fooCpp fooCuda barCuda )

target_link_libraries(fooCpp
                   fooCuda
                   barCuda
)

The other alternative would be that it isnt defined under global namespace. But, it is.

I have spent a few hours on this and am about to give up. Any ideas?

-Arunabh

Aucun commentaire:

Enregistrer un commentaire