如何在 OpenCL 中使用多通道?

计算科学 开放式
2021-12-26 03:48:41

我是 opencl 的新手,但我有一些使用 HLSL 的经验。在 HLSL 中,当您需要在继续下一步之前完成计算时,会使用多遍。

我想知道这种事情是如何在opencl中完成的。

我正在编写如下图像过滤器

float4 Convolution(__read_only image2d_t srcImg, int2 point, float * kern)
{
    const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
    int maskSize = 1;
    float4 sum = (float4)(0.0f,0.0f,0.0f,0.0f);
    for (int i = -maskSize; i <= maskSize; i++)
    {
        for(int j = -maskSize; j <= maskSize; j++) 
        {
            int2 delta = (int2)(i+maskSize,j+maskSize); 
            int2 pos = (int2)(i,j);
            sum += kern[(delta.y*3) + delta.x] * convert_float4(read_imageui(srcImg, smp, point + pos));
        }
    }
    return sum;
}


__kernel void imagingTest(__read_only  image2d_t srcImg, __write_only image2d_t dstImg)
{
    float k = 30.0L;
    float delta_t = 0.14285714285714285714285714285714L; // 1/7

    float hN[9];
    hN[0] = 0; hN[1] = 1; hN[2] = 0;
    hN[3] = 0; hN[4] =-1; hN[5] = 0; 
    hN[6] = 0; hN[7] = 0; hN[8] = 0;

    float hS[9];
    hS[0] = 0; hS[1] = 0; hS[2] = 0;
    hS[3] = 0; hS[4] =-1; hS[5] = 0; 
    hS[6] = 0; hS[7] = 1; hS[8] = 0;

    float hE[9];
    hE[0] = 0; hE[1] = 0; hE[2] = 0;
    hE[3] = 0; hE[4] =-1; hE[5] = 1; 
    hE[6] = 0; hE[7] = 0; hE[8] = 0;

    float hW[9];
    hW[0] = 0; hW[1] = 0; hW[2] = 0;
    hW[3] = 1; hW[4] =-1; hW[5] = 0; 
    hW[6] = 0; hW[7] = 0; hW[8] = 0;

    float hNE[9];
    hNE[0] = 0; hNE[1] = 0; hNE[2] = 1;
    hNE[3] = 0; hNE[4] =-1; hNE[5] = 0; 
    hNE[6] = 0; hNE[7] = 0; hNE[8] = 0;

    float hSE[9];
    hSE[0] = 0; hSE[1] = 0; hSE[2] = 0;
    hSE[3] = 0; hSE[4] =-1; hSE[5] = 0; 
    hSE[6] = 0; hSE[7] = 0; hSE[8] = 1;

    float hSW[9];
    hSW[0] = 0; hSW[1] = 0; hSW[2] = 0;
    hSW[3] = 0; hSW[4] =-1; hSW[5] = 0; 
    hSW[6] = 1; hSW[7] = 0; hSW[8] = 0;

    float hNW[9];
    hNW[0] = 1; hNW[1] = 0; hNW[2] = 0;
    hNW[3] = 0; hNW[4] =-1; hNW[5] = 0; 
    hNW[6] = 0; hNW[7] = 0; hNW[8] = 0;

    const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

    int2 coord = (int2)(get_global_id(0), get_global_id(1));

    uint4 bgra = read_imageui(srcImg, smp, coord); 

    float4 nablaN = Convolution(srcImg, coord, hN);
    float4 nablaS = Convolution(srcImg, coord, hS);
    float4 nablaE = Convolution(srcImg, coord, hE);
    float4 nablaW = Convolution(srcImg, coord, hW);

    float4 nablaNE = Convolution(srcImg, coord, hNE);
    float4 nablaNW = Convolution(srcImg, coord, hNW);
    float4 nablaSE = Convolution(srcImg, coord, hSE);
    float4 nablaSW = Convolution(srcImg, coord, hSW);

    float4 cN  = exp(-(nablaN /k) * (nablaN /k));
    float4 cS  = exp(-(nablaS /k) * (nablaS /k));
    float4 cW  = exp(-(nablaW /k) * (nablaW /k));
    float4 cE  = exp(-(nablaE /k) * (nablaE /k));
    float4 cNE = exp(-(nablaNE/k) * (nablaNE/k));
    float4 cSE = exp(-(nablaSE/k) * (nablaSE/k));
    float4 cSW = exp(-(nablaSW/k) * (nablaSW/k));
    float4 cNW = exp(-(nablaNW/k) * (nablaNW/k));

    float4 sum = 0.5 * (nablaNE * cNE) + (nablaSE * cSE) + (nablaSW * cSW) + (nablaNW * cNW);
    sum += (nablaN * cN) + (nablaS * cS) + (nablaW * cW) + (nablaE * cE);
    sum *= delta_t; 

    bgra.x = bgra.y = bgra.z = convert_int(sum.x);

    bgra.w = 255;
    write_imageui(dstImg, coord, bgra);
}

这执行了一次各向异性扩散,我希望能够多次应用此过程。我该怎么做呢?

编辑

这是 C# 代码

using System;
using System.Collections;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Runtime.InteropServices;

using Emgu.CV;
using Emgu.Util;
using Emgu;
using Emgu.CV.Structure;

using OpenCL.Net;

namespace HLSLTest
{
    public class Computations
    {
        private Cl.Context _context;
        private Cl.Device _device;
        private Cl.Kernel kernel;

        private void CheckErr(Cl.ErrorCode err, string name)
        {
            if (err != Cl.ErrorCode.Success)
            {
                Console.WriteLine("ERROR: " + name + " (" + err.ToString() + ")");
            }
        }

        private void ContextNotify(string errInfo, byte[] data, IntPtr cb, IntPtr userData)
        {
            Console.WriteLine("OpenCL Notification: " + errInfo);
        }

        public void Setup()
        {
            Cl.ErrorCode error;
            Cl.Platform[] platforms = Cl.GetPlatformIDs(out error);
            List<Cl.Device> devicesList = new List<Cl.Device>();

            CheckErr(error, "Cl.GetPlatformIDs");

            foreach (Cl.Platform platform in platforms)
            {
                string platformName = Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString();
                Console.WriteLine("Platform: " + platformName);
                CheckErr(error, "Cl.GetPlatformInfo");

                //We will be looking only for GPU devices
                foreach (Cl.Device device in Cl.GetDeviceIDs(platform, Cl.DeviceType.Gpu, out error))
                {
                    CheckErr(error, "Cl.GetDeviceIDs");
                    Console.WriteLine("Device: " + device.ToString());
                    devicesList.Add(device);
                }
            }

            if (devicesList.Count <= 0)
            {
                Console.WriteLine("No devices found.");
                return;
            }

            _device = devicesList[0];

            if (Cl.GetDeviceInfo(_device, Cl.DeviceInfo.ImageSupport, out error).CastTo<Cl.Bool>() == Cl.Bool.False)
            {
                Console.WriteLine("No image support.");
                return;
            }

            _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error); //Second parameter is amount of devices
            CheckErr(error, "Cl.CreateContext");

            //Load and compile kernel source code.
            string programPath = Environment.CurrentDirectory + "/../../../ImagingTest.cl";  //The path to the source file may vary

            if (!System.IO.File.Exists(programPath))
            {
                Console.WriteLine("Program doesn't exist at path " + programPath);
                return;
            }

            string programSource = System.IO.File.ReadAllText(programPath);

            using (Cl.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error))
            {
                CheckErr(error, "Cl.CreateProgramWithSource");

                //Compile kernel source
                error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                CheckErr(error, "Cl.BuildProgram");

                //Check for any compilation errors
                if
                (
                    Cl.GetProgramBuildInfo
                    (
                        program,
                        _device,
                        Cl.ProgramBuildInfo.Status,
                        out error
                    ).CastTo<Cl.BuildStatus>() != Cl.BuildStatus.Success
                )
                {
                    CheckErr(error, "Cl.GetProgramBuildInfo");
                    Console.WriteLine("Cl.GetProgramBuildInfo != Success");
                    Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, Cl.ProgramBuildInfo.Log, out error));
                    return;
                }

                //Create the required kernel (entry function)
                kernel = Cl.CreateKernel(program, "imagingTest", out error);                
                CheckErr(error, "Cl.CreateKernel");

            }
        }

        public void ImagingTest(Image<Gray, Single> InputImage, out Image<Gray, Single> outputImage)
        {
            Cl.ErrorCode error;

            int intPtrSize = 0;
            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            //Image's RGBA data converted to an unmanaged[] array
            byte[] inputByteArray;
            //OpenCL memory buffer that will keep our image's byte[] data.
            Cl.Mem inputImage2DBuffer;

            Cl.ImageFormat clImageFormat = new Cl.ImageFormat(Cl.ChannelOrder.RGBA, Cl.ChannelType.Unsigned_Int8);

            int inputImgWidth, inputImgHeight;
            int inputImgBytesSize;
            int inputImgStride;

            inputImgWidth = InputImage.Width;
            inputImgHeight = InputImage.Height;

            System.Drawing.Bitmap bmpImage = InputImage.ToBitmap();

            //Get raw pixel data of the bitmap
            //The format should match the format of clImageFormat
            BitmapData bitmapData = bmpImage.LockBits
            (
                new Rectangle(0, 0, bmpImage.Width, bmpImage.Height),
                ImageLockMode.ReadOnly,
                PixelFormat.Format32bppArgb
            );

            inputImgStride = bitmapData.Stride;
            inputImgBytesSize = bitmapData.Stride * bitmapData.Height;

            //Copy the raw bitmap data to an unmanaged byte[] array
            inputByteArray = new byte[inputImgBytesSize];
            Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize);

            //Allocate OpenCL image memory buffer
            inputImage2DBuffer = Cl.CreateImage2D
            (
                _context,
                Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                clImageFormat,
                (IntPtr)bitmapData.Width,
                (IntPtr)bitmapData.Height,
                (IntPtr)0,
                inputByteArray,
                out error
            );

            CheckErr(error, "Cl.CreateImage2D input");

            //Unmanaged output image's raw RGBA byte[] array
            byte[] outputByteArray = new byte[inputImgBytesSize];

            //Allocate OpenCL image memory buffer
            Cl.Mem outputImage2DBuffer = Cl.CreateImage2D
            (
                _context,
                Cl.MemFlags.CopyHostPtr | Cl.MemFlags.WriteOnly,
                clImageFormat,
                (IntPtr)inputImgWidth,
                (IntPtr)inputImgHeight,
                (IntPtr)0, 
                outputByteArray,
                out error
            );

            CheckErr(error, "Cl.CreateImage2D output");

            //Pass the memory buffers to our kernel function
            error  = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize,  inputImage2DBuffer);
            error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer);
            CheckErr(error, "Cl.SetKernelArg");

            //Create a command queue, where all of the commands for execution will be added
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties)0, out error);
            CheckErr(error, "Cl.CreateCommandQueue");

            Cl.Event clevent;

            //Copy input image from the host to the GPU.
            IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 };  //x, y, z
            IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z
            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 };
            error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueWriteImage");

            //Execute our kernel (OpenCL code)
            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent);

            CheckErr(error, "Cl.EnqueueNDRangeKernel");

            //Wait for completion of all calculations on the GPU.
            error = Cl.Finish(cmdQueue);
            CheckErr(error, "Cl.Finish");

            //Read the processed image from GPU to raw RGBA data byte[] array
            error = Cl.EnqueueReadImage
            (
                cmdQueue,
                outputImage2DBuffer,
                Cl.Bool.True,
                originPtr,
                regionPtr,
                (IntPtr)0,
                (IntPtr)0,
                outputByteArray,
                0,
                null,
                out clevent
            );

            CheckErr(error, "Cl.clEnqueueReadImage");

            //Clean up memory
            Cl.ReleaseKernel(kernel);
            Cl.ReleaseCommandQueue(cmdQueue);

            Cl.ReleaseMemObject(inputImage2DBuffer);
            Cl.ReleaseMemObject(outputImage2DBuffer);

            //Get a pointer to our unmanaged output byte[] array
            GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned);
            IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject();

            //Create a new bitmap with processed data and save it to a file.
            Bitmap outputBitmap = new Bitmap(inputImgWidth, inputImgHeight, inputImgStride, PixelFormat.Format32bppArgb, outputBmpPointer);

            outputImage = new Image<Gray, Single>(outputBitmap);

            //outputBitmap.Save(outputImagePath, System.Drawing.Imaging.ImageFormat.Png);

            pinnedOutputArray.Free();
        }
    }
}
1个回答

OpenCL 使用障碍

您需要将第一个“通过”的结果存储在一个变量中,然后调用

barrier(CLK_LOCAL_MEM_FENCE);

一旦所有线程都到达屏障,就可​​以执行下一段代码。

这是为了强制执行数据依赖性。