diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 0000000..665884a --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.11 FATAL_ERROR) + +include(GNUInstallDirs) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +SET(PROJECT_NAME pynvjpeg) +PROJECT(${PROJECT_NAME} VERSION 0.1 LANGUAGES CUDA CXX) + +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 11) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + +include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +link_directories(${CMAKE_CUDA_INCLUDE_DIRS}) + +find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +find_library(NVJPEG_LIBRARY nvjpeg ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + +include_directories( + ${PROJECT_SOURCE_DIR} + ) + +include(FetchContent) +FetchContent_Declare( + pybind11_sources + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.9 + ) +FetchContent_GetProperties(pybind11_sources) +if(NOT pybind11_sources_POPULATED) + FetchContent_Populate(pybind11_sources) + add_subdirectory( + ${pybind11_sources_SOURCE_DIR} + ${pybind11_sources_BINARY_DIR} + ) +endif() + +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE) +endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + +add_library(${PROJECT_NAME} + SHARED + nvjpeg_decoder.cpp + jpeg_image.cpp + cuda_util.cpp + python_nvjpeg_decoder.cpp + ) + +target_link_libraries(${PROJECT_NAME} + PUBLIC + ${CUDART_LIBRARY} + ${NVJPEG_LIBRARY} + pybind11::module) + +set_target_properties(${PROJECT_NAME} + PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" + ) + +add_executable(test_decode test.cpp) +target_link_libraries(test_decode ${PROJECT_NAME} pybind11::embed) + +include(CTest) +enable_testing() + +add_test( + NAME test_decode + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/cat.jpg + ) + +add_test( + NAME python_test_short + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test.py $ ${CMAKE_CURRENT_SOURCE_DIR}/cat.jpg + ) diff --git a/cpp/cat.jpg b/cpp/cat.jpg new file mode 100644 index 0000000..2e24329 Binary files /dev/null and b/cpp/cat.jpg differ diff --git a/cpp/cuda_util.cpp b/cpp/cuda_util.cpp new file mode 100644 index 0000000..ad69a75 --- /dev/null +++ b/cpp/cuda_util.cpp @@ -0,0 +1,12 @@ + +#include "cuda_util.h" + + +namespace NVJpegDecoder { + +int DevMalloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } +int DevFree(void *p) { return (int)cudaFree(p); } +int HostMalloc(void** p, size_t s, unsigned int f) { return (int)cudaHostAlloc(p, s, f); } +int HostFree(void* p) { return (int)cudaFreeHost(p); } + +} // namespace NVJpegDecoder diff --git a/cpp/cuda_util.h b/cpp/cuda_util.h new file mode 100644 index 0000000..3b946ac --- /dev/null +++ b/cpp/cuda_util.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace NVJpegDecoder { + + +#define CHECK_CUDA(call) \ + do { \ + CudaStatus s(call); \ + if (!s.IsOk()) { \ + std::cout << "CUDA Runtime failure: '#" << s.Msg() << "' at " << __FILE__ << ":" << __LINE__ << std::endl; \ + return false; \ + } \ + } while (false) \ + +#define CHECK_NVJPEG(call) \ + do { \ + NvJpegStatus s(call); \ + if (!s.IsOk()) { \ + std::cout << "NVJPEG failure: '#" << s.Msg() << "' at " << __FILE__ << ":" << __LINE__ << std::endl; \ + return false; \ + } \ + } while (false) + + +int DevMalloc(void **p, size_t s); +int DevFree(void *p); +int HostMalloc(void** p, size_t s, unsigned int f); +int HostFree(void* p); + + +class CudaStatus { +public: + explicit CudaStatus(cudaError_t error) : mCode(error) {} + + CudaStatus(CudaStatus&) = default; + CudaStatus& operator=(CudaStatus&) = default; + + bool IsOk() { + return mCode == cudaSuccess; + } + + std::string Msg() { + return cudaGetErrorString(mCode); + } + +private: + cudaError_t mCode; +}; + + +class NvJpegStatus { +public: + explicit NvJpegStatus(nvjpegStatus_t error) : mCode(error) {} + + bool IsOk() { + return mCode == NVJPEG_STATUS_SUCCESS; + } + + inline nvjpegStatus_t Code() {return mCode;} + + std::string Msg() { + switch (mCode) { + case NVJPEG_STATUS_NOT_INITIALIZED: + return "NVJPEG_STATUS_NOT_INITIALIZED"; + case NVJPEG_STATUS_INVALID_PARAMETER: + return "NVJPEG_STATUS_INVALID_PARAMETER"; + case NVJPEG_STATUS_BAD_JPEG: + return "NVJPEG_STATUS_BAD_JPEG"; + case NVJPEG_STATUS_JPEG_NOT_SUPPORTED: + return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED"; + case NVJPEG_STATUS_ALLOCATOR_FAILURE: + return "NVJPEG_STATUS_ALLOCATOR_FAILURE"; + case NVJPEG_STATUS_EXECUTION_FAILED: + return "NVJPEG_STATUS_EXECUTION_FAILED"; + case NVJPEG_STATUS_ARCH_MISMATCH: + return "NVJPEG_STATUS_ARCH_MISMATCH"; + case NVJPEG_STATUS_INTERNAL_ERROR: + return "NVJPEG_STATUS_INTERNAL_ERROR"; + case NVJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED: + return "NVJPEG_STATUS_IMPLEMENTATION_NOT_SUPPORTED"; + case NVJPEG_STATUS_INCOMPLETE_BITSTREAM: + return "NVJPEG_STATUS_INCOMPLETE_BITSTREAM"; + default: + return "UNKNOWN NVJPEG ERROR"; + } + } + +private: + nvjpegStatus_t mCode; +}; + +} // namespace NVJpegDecoder diff --git a/cpp/jpeg_image.cpp b/cpp/jpeg_image.cpp new file mode 100644 index 0000000..14d2117 --- /dev/null +++ b/cpp/jpeg_image.cpp @@ -0,0 +1,59 @@ +#include "jpeg_image.h" +#include "cuda_util.h" + + +namespace NVJpegDecoder { + +bool JpegImage::Init(int width, int height, int channels) { + mNvImage = std::make_unique(); + unsigned char * pBuffer = nullptr; + CHECK_CUDA(cudaMalloc((void **)&pBuffer, height * width * channels)); + for(int i = 0; i < channels; i++) { + mNvImage->channel[i] = pBuffer + (height *width * i); + mNvImage->pitch[i] = (unsigned int)width; + } + + mNvImage->pitch[0] = (unsigned int)width * channels; + mWidth = width; + mHeight = height; + mChannels = channels; + return true; +} + +JpegImage::JpegImage(JpegImage&& rhs) { + mWidth = rhs.mWidth; + mHeight = rhs.mHeight; + mChannels = rhs.mChannels; + mSubsampling = rhs.mSubsampling; + mNvImage = std::move(rhs.mNvImage); +} + +JpegImage& JpegImage::operator=(JpegImage&& rhs) { + mWidth = rhs.mWidth; + mHeight = rhs.mHeight; + mChannels = rhs.mChannels; + mSubsampling = rhs.mSubsampling; + mNvImage = std::move(rhs.mNvImage); + return *this; +} + +unsigned char* JpegImage::Cpu() { + size_t size = mHeight * mWidth * mChannels; + auto buffer = std::make_unique(size) ; + CudaStatus s(cudaMemcpy(buffer.get(), mNvImage->channel[0], size, cudaMemcpyDeviceToHost)); + + if (!s.IsOk()) { + std::cout << "Copy image from GPU to CPU failed: " << s.Msg() << std::endl; + return nullptr; + } + return buffer.release(); +} + +JpegImage::~JpegImage(){ + if (mNvImage != nullptr) { + cudaFree(mNvImage->channel[0]); + } +} + + +} // namespace NVJpegDecoder diff --git a/cpp/jpeg_image.h b/cpp/jpeg_image.h new file mode 100644 index 0000000..1dc9a3a --- /dev/null +++ b/cpp/jpeg_image.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include +#include + +namespace NVJpegDecoder { + +class JpegImage { + +public: + JpegImage() noexcept : mNvImage(nullptr) {} + virtual ~JpegImage(); + + JpegImage(const JpegImage&) = delete; + JpegImage& operator=(const JpegImage&) = delete; + + JpegImage(JpegImage&& rhs); + JpegImage& operator=(JpegImage&& rhs); + + bool Init(int width, int height, int channels); + + nvjpegImage_t* GetImagePoint() { + return mNvImage.get(); + } + + const std::vector Dims() { + return std::vector{mHeight, mWidth, mChannels}; + } + + unsigned char* Cpu(); + +private: + int mWidth = 0; + int mHeight = 0; + int mChannels = 0; + nvjpegChromaSubsampling_t mSubsampling; + std::unique_ptr mNvImage; +}; +} // namespace NVJpegDecoder diff --git a/cpp/my_test.cpp b/cpp/my_test.cpp new file mode 100644 index 0000000..596b0c6 --- /dev/null +++ b/cpp/my_test.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + +namespace py = pybind11; + + +void print(py::list l) { + for (auto item: l) { + std::cout << item.attr("__str__")().cast() << std::endl; + } +} + +int main() { + py::scoped_interpreter guard{}; + py::print("Hello, World!"); + py::list data; + for (int i = 0; i < 10; i++) { + data.append(i); + } + print(data); + return 0; +} diff --git a/cpp/nvjpeg_decoder.cpp b/cpp/nvjpeg_decoder.cpp new file mode 100644 index 0000000..b922928 --- /dev/null +++ b/cpp/nvjpeg_decoder.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include "cuda_util.h" +#include "nvjpeg_decoder.h" + + +namespace NVJpegDecoder { + +bool Decoder::BindDevice(int device_id) { + if (device_id == mDeviceId) { + return true; + } + + if (device_id < 0) { + std::cout<< "Device id must >= 0, the input is " + << device_id << std::endl; + return false; + } + + Destroy(); + mDeviceId = device_id; + CHECK_CUDA(cudaSetDevice(mDeviceId)); + CHECK_NVJPEG(nvjpegCreateSimple(&mHandle)); + CHECK_NVJPEG(nvjpegJpegStateCreate(mHandle, &mState)); + CHECK_CUDA(cudaStreamCreateWithFlags(&mStream, cudaStreamNonBlocking)); + return true; +} + +bool Decoder::Read(const char* filename, JpegImage& image) { + std::ifstream input(filename); + if (!(input.is_open())) { + std::cout << "Open file " << filename << " failed" << std::endl; + return false; + } + std::string imagedata((std::istreambuf_iterator(input)), std::istreambuf_iterator()); + if (!Decode(imagedata, image)) { + return false; + } + return true; +} + +bool Decoder::Decode(std::string& imagedata , JpegImage& image) { + if (!PrepareJpegImage(imagedata, image)) { + return false; + } + CHECK_NVJPEG(nvjpegDecode( + mHandle, + mState, + (const unsigned char *)imagedata.data(), + imagedata.size(), + NVJPEG_OUTPUT_RGBI, + image.GetImagePoint(), + mStream)); + return true; +} + +bool Decoder::PrepareJpegImage(const std::string& image, JpegImage& output) { + int widths[NVJPEG_MAX_COMPONENT]; + int heights[NVJPEG_MAX_COMPONENT]; + int channels; + nvjpegChromaSubsampling_t subsampling; + + CHECK_NVJPEG(nvjpegGetImageInfo( + mHandle, (unsigned char *)image.data(), image.size(), + &channels, &subsampling, widths, heights)); + + if (NVJPEG_CSS_UNKNOWN == subsampling) { + std::cout << "Unknown chroma subsampling" << std::endl; + return false; + } + + if (!output.Init(widths[0], heights[0], channels)) { + return false; + } + return true; +} + +void Decoder::Destroy() { + if (mDeviceId >= 0) { + cudaStreamDestroy(mStream); + nvjpegJpegStateDestroy(mState); + nvjpegDestroy(mHandle); + mDeviceId = -1; + } +} + +Decoder::~Decoder() { + Destroy(); +} + +} // namespace NVJpegDecoder diff --git a/cpp/nvjpeg_decoder.h b/cpp/nvjpeg_decoder.h new file mode 100644 index 0000000..5c0c013 --- /dev/null +++ b/cpp/nvjpeg_decoder.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include + +#include "jpeg_image.h" + +namespace NVJpegDecoder { + +typedef std::vector OriginJpegImages; +typedef std::vector JpegImages; + +class Decoder { +public: + Decoder():mDeviceId(-1) {} + virtual ~Decoder(); + + Decoder(Decoder&) = delete; + Decoder& operator=(Decoder&) = delete; + + bool BindDevice(int device_id=0); + bool Read(const char* filename, JpegImage& image); + bool Decode(std::string& imagedata , JpegImage& image); + +private: + bool PrepareJpegImage(const std::string& image, JpegImage& output); + void Destroy(); + +private: + nvjpegHandle_t mHandle; + nvjpegJpegState_t mState; + + int mDeviceId; + cudaStream_t mStream; +}; + +} // namespace NVJpegDecoder diff --git a/cpp/python_nvjpeg_decoder.cpp b/cpp/python_nvjpeg_decoder.cpp new file mode 100644 index 0000000..b837ff8 --- /dev/null +++ b/cpp/python_nvjpeg_decoder.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include "python_nvjpeg_decoder.h" + + +namespace NVJpegDecoder { + +bool PythonDecoder::BindDevice(int device_id) { + py::gil_scoped_release release; + return mDecoder.BindDevice(device_id); +} + +py::object PythonDecoder::Read(std::string& filename) { + unsigned char* data = nullptr; + JpegImage image; + { + py::gil_scoped_release release; + if (mDecoder.Read(filename.c_str(), image)) { + data = image.Cpu(); + } + } // gets gil + + std::unique_ptr ret(data); + if (ret != nullptr) { + return py::array(py::dtype(py::format_descriptor::format()), image.Dims(), (void*)ret.get()); + } + return py::none(); +} + +py::object PythonDecoder::Decode(std::string& image_bytes) { + JpegImage image; + if (!mDecoder.Decode(image_bytes, image)) { + return py::none(); + } + std::unique_ptr data(image.Cpu()); + if (nullptr == data) { + return py::none(); + } + return py::array(py::dtype(py::format_descriptor::format()), image.Dims(), (void*)data.get()); +} + +PYBIND11_MODULE(pynvjpeg, m) { + py::class_>(m, "Decoder") + .def(py::init()) + .def("bind_device", &PythonDecoder::BindDevice) + .def("imread", &PythonDecoder::Read) + .def("imdecode", &PythonDecoder::Decode); +} + +} // namespace NVJpegDecoder diff --git a/cpp/python_nvjpeg_decoder.h b/cpp/python_nvjpeg_decoder.h new file mode 100644 index 0000000..19bf1ca --- /dev/null +++ b/cpp/python_nvjpeg_decoder.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include "nvjpeg_decoder.h" + +namespace py = pybind11; + +namespace NVJpegDecoder { + +class PythonDecoder { +public: + PythonDecoder() = default; + + PythonDecoder(PythonDecoder&) = delete; + PythonDecoder& operator=(PythonDecoder&) = delete; + + bool BindDevice(int device_id=0); + py::object Read(std::string&); + py::object Decode(std::string&); + +private: + Decoder mDecoder; +}; + +} // namespace NVJpegDecoder diff --git a/cpp/test.cpp b/cpp/test.cpp new file mode 100644 index 0000000..1ad4cf7 --- /dev/null +++ b/cpp/test.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include "nvjpeg_decoder.h" + + +bool ReadImage(const char* filename, std::string& imagedata) { + std::ifstream input(filename); + if (!(input.is_open())) { + std::cout << "Open file " << filename << " failed" << std::endl; + return false; + } + + imagedata = std::string((std::istreambuf_iterator(input)), std::istreambuf_iterator()); + return true; +} + +int main(int argc, char *argv[]) { + const char* image_file = argv[1]; + NVJpegDecoder::Decoder decoder; + if (!decoder.BindDevice(0)) { + std::cout << "Init Failed" << std::endl; + return -1; + } + + // test deocde from bytes + { + std::string image_btyes; + if (!ReadImage(image_file, image_btyes)) { + return -1; + } + int count = 0; + timeb t1, t2; + ftime(&t1); + + while (count < 1000) { + NVJpegDecoder::JpegImage image; + if (!decoder.Decode(image_btyes, image)) { + return -1; + } + count++; + } + ftime(&t2); + std::cout << "--------- " << t2.time * 1000 + t2.millitm - t1.time * 1000 - t1.millitm << std::endl; + } + + // test decode from file + { + timeb t1, t2; + ftime(&t1); + { + int count = 0; + while (count < 1000) { + NVJpegDecoder::JpegImage image; + if (!decoder.Read(image_file, image)) { + return -1; + } + unsigned char* d = image.Cpu(); + delete[] d; + count++; + } + } + ftime(&t2); + std::cout << "--------- " << t2.time * 1000 + t2.millitm - t1.time * 1000 - t1.millitm << std::endl; + } + + decoder.BindDevice(1); + { + std::string image_btyes; + if (!ReadImage(image_file, image_btyes)) { + return -1; + } + int count = 0; + timeb t1, t2; + ftime(&t1); + + while (count < 1000) { + NVJpegDecoder::JpegImage image; + if (!decoder.Decode(image_btyes, image)) { + return -1; + } + count++; + } + ftime(&t2); + std::cout << "--------- " << t2.time * 1000 + t2.millitm - t1.time * 1000 - t1.millitm << std::endl; + } + return 0; +} diff --git a/cpp/test.py b/cpp/test.py new file mode 100644 index 0000000..9e6ddf1 --- /dev/null +++ b/cpp/test.py @@ -0,0 +1,26 @@ +import sys +import numpy as np +import cv2 +import time +sys.path.insert(0, '/'.join(sys.argv[1].split('/')[:-1])) +import pynvjpeg + + +if __name__ == '__main__': + decoder = pynvjpeg.Decoder() + assert(decoder.bind_device(0) is True) + filename = sys.argv[2] + image0 = decoder.imread(filename) + image0 = image0.astype(np.int32) + h, w, c = image0.shape + with open(filename, 'rb') as f: + data = f.read() + + + image1 = cv2.imread(filename) + image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB) + image1 = image1.astype(np.int32) + if np.sum((image1 - image0)) > h * w * c: + exit(-1) + exit(0) + diff --git a/cpp/test_pydecoder.cpp b/cpp/test_pydecoder.cpp new file mode 100644 index 0000000..c1d1b15 --- /dev/null +++ b/cpp/test_pydecoder.cpp @@ -0,0 +1,22 @@ +#include +#include +#include "python_nvjpeg_decoder.h" + + +int main() { + py::scoped_interpreter guard{}; + NVJpegDecoder::PythonDecoder decoder; + if (!decoder.Init()) { + std::cout << "Init Failed" << std::endl; + return -1; + } + std::cout << 1 << std::endl;; + py::list data; + std::cout << 2 << std::endl;; + for (int i = 0; i < 10; i++) { + std::cout << i << ' '; + data.append(i); + } + decoder.BatchDecode(data); + return 0; +} diff --git a/image_decoder.py b/image_decoder.py new file mode 100644 index 0000000..57c14fa --- /dev/null +++ b/image_decoder.py @@ -0,0 +1,96 @@ +# encoding=utf-8 +import os +import sys +import logging +import platform + +import cv2 +import requests +import numpy as np +from towhee.types import Image +from towhee.operator import PyOperator, SharedType + +logger = logging.getLogger() + + +def create_pynvjpeg(): + if platform.system().lower() != 'linux': + logger.warning('The GPU deocder only support linux') + return None + + py_version = platform.python_version() + version = ''.join(py_version.split('.')[0:2]) + cur_dir = os.path.dirname(os.path.abspath(__file__)) + lib_path = cur_dir + '/' + 'py' + version + 'nvjpeg' + sys.path.insert(0, lib_path) + try: + import pynvjpeg + decoder = pynvjpeg.Decoder() + if not decoder.bind_device(0): + logger.info('Init GPU deocder failed, use CPU decoder') + return None + else: + return decoder + except Exception as e: + logger.error('Create nvjpeg failed, use opencv decoder, errors: ', str(e)) + return None + + +class ImageDecoder(PyOperator): + def __init__(self): + self._nvjpeg = create_pynvjpeg() + + def to_device(self, device): + if self._nvjpeg is not None: + if not self._nvjpeg.bind_device(device): + self._nvjpeg = None + + def _download_image(self, image_url): + image_type = image_url.split('?')[0].split('.')[-1].lower() + is_jpeg = True if image_type == 'jpeg' or image_type == 'jpg' else False + r = requests.get(image_url, timeout=(20, 20)) + if r.status_code // 100 != 2: + logging.error('Download image from %s failed, error msg: %s, request code: %s ' % (image_url, + r.text, + r.status_code)) + return None + return is_jpeg, r.content + + def _read_image(self, image_path): + is_jpeg = image_path.lower().endswith('jpg') or image_path.lower().endswith('jpeg') + with open(image_path, 'rb') as f: + return is_jpeg, f.read() + + def load_image(self, image_path): + if image_path.startswith('http'): + return self._download_image(image_path) + else: + return self._read_image(image_path) + + def __call__(self, image_path: str): + is_jpeg, image_content = self.load_image(image_path) + if is_jpeg and self._nvjpeg is not None: + image = self._nvjpeg.imdecode(image_content) + else: + arr = np.asarray(bytearray(image_content), dtype=np.uint8) + image = cv2.imdecode(arr, -1) + if image is not None: + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + if image is None: + raise RuntimeError('Decode image %s failed' % image_path) + return Image(image, 'RGB') + + def input_schema(self): + return [(str, (1,))] + + def output_schema(self): + return [(Image, (-1, -1, 3))] + + @property + def shared_type(self): + return SharedType.NotShareable + +if __name__ == '__main__': + d = ImageDecoder() + print(d('/home/junjie.jiangjjj/images/1.png')) + print(d('/home/junjie.jiangjjj/images/towhee.jpeg')) diff --git a/py38nvjpeg/__init__.py b/py38nvjpeg/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/py38nvjpeg/pynvjpeg.cpython-38-x86_64-linux-gnu.so b/py38nvjpeg/pynvjpeg.cpython-38-x86_64-linux-gnu.so new file mode 100755 index 0000000..af632c0 Binary files /dev/null and b/py38nvjpeg/pynvjpeg.cpython-38-x86_64-linux-gnu.so differ