/*******************************************************************************
* Copyright 2016 Intel Corporation.
*
*
* This software and the related documents are Intel copyrighted materials, and your use of them is governed by
* the express license under which they were provided to you ('License'). Unless the License provides otherwise,
* you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related
* documents without Intel's prior written permission.
* This software and the related documents are provided as is, with no express or implied warranties, other than
* those that are expressly stated in the License.
*******************************************************************************/

// Intel(R) Integrated Performance Primitives (Intel(R) IPP)

#include "pifbilateralbrd_tl.h"

static void ownGetBilateralSliceSize(IppiSizeL dstRoiSize, IppiSizeL maskSize, Ipp32u numThreads, IppiSizeL* pTileSize, IppiSizeL* pLastSize, IppiPointL* splitImage)
{
    IppiSizeL tileSize;
    tileSize.width  = dstRoiSize.width;
    tileSize.height = dstRoiSize.height / (IppSizeL)numThreads;
    (*splitImage).x = (*splitImage).y = 1;
    if (((numThreads == 1) || (tileSize.height < TYLE_S)) && (dstRoiSize.height))
    {
        (*pLastSize).width = (*pTileSize).width = dstRoiSize.width;
        (*pLastSize).height = (*pTileSize).height = dstRoiSize.height;
    }
    else
    {
        tileSize.height = TYLE_S;
        tileSize.width = dstRoiSize.width;
        /* split the image to tiles */
        ippiSplitToTiles_LT(dstRoiSize, tileSize, splitImage, pTileSize, pLastSize);

    }
}
/* /////////////////////////////////////////////////////////////////////////////
//                     Bilateral filter functions with Border
// /////////////////////////////////////////////////////////////////////////////
//  Name:       ippiFilterBilateralBorderGetBufferSize_LT
//  Purpose:    to define buffer size for bilateral filter
//  Parameters:
//   filter        Type of bilateral filter. Possible value is ippiFilterBilateralGauss.
//   dstRoiSize    Roi size (in pixels) of destination image what will be applied
//                 for processing.
//   radius        Radius of circular neighborhood what defines pixels for calculation.
//   dataType      Data type of the source and desination images. Possible values
//                 are Ipp8u and Ipp32f.
//   numChannels   Number of channels in the images. Possible values are 1 and 3.
//   distMethod    The type of method for definition of distance beetween pixel untensity.
//                 Possible value is ippDistNormL1.
//   pSpecSize     Pointer to the size (in bytes) of the spec.
//   pBufferSize   Pointer to the size (in bytes) of the external work buffer.
//  Return:
//    ippStsNoErr               OK
//    ippStsNullPtrErr          any pointer is NULL
//    ippStsSizeErr             size of dstRoiSize is less or equal 0
//    ippStsMaskSizeErr         radius is less or equal 0
//    ippStsNotSupportedModeErr filter or distMethod is not supported
//    ippStsDataTypeErr         Indicates an error when dataType has an illegal value.
//    ippStsNumChannelsErr      Indicates an error when numChannels has an illegal value.
*/
IPPFUN(IppStatus, ippiFilterBilateralBorderGetBufferSize_LT, (IppiFilterBilateralType filter, IppiSizeL dstRoiSize, int radius, IppDataType dataType, int numChannels, IppiDistanceMethodType distMethodType, IppSizeL *pSpecSizeL, IppSizeL *pBufferSize))
{
    Ipp32s numThreads;
    IppiSizeL pTileSize = { 0, 0 }, pLastSize = { 0, 0 };
    IppSizeL pSpecSize; IppiPointL splitImage = { 0, 0 };
    IppStatus status = ippStsNoErr;
    IppiSizeL maskSize;
    IppSizeL width  = dstRoiSize.width, pBufSize;
    IppSizeL height = dstRoiSize.height;
    if (pSpecSizeL == 0 || pBufferSize == 0) return ippStsNullPtrErr;
    if (width <= 0 || height <= 0) return ippStsSizeErr;
    maskSize.height = maskSize.width = radius * 2 + 1;

    ippGetNumThreads_LT(&numThreads);

    ownGetBilateralSliceSize(dstRoiSize, maskSize, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)     pLastSize.width  = pTileSize.width;
    if (pLastSize.height < pTileSize.height)   pLastSize.height = pTileSize.height;
    status = ippiFilterBilateralBorderGetBufferSize_L(filter, pLastSize, radius, dataType, numChannels, distMethodType, &pSpecSize, &pBufSize);
    if (status >= 0)
    {
        *pSpecSizeL = pSpecSize + sizeof(BilateralInfo);
        *pBufferSize = pBufSize * ((IppSizeL)splitImage.y*splitImage.x);
    }
    return status;
}

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippiFilterBilateralBorderInit_LT
//  Purpose:    initialization of Spec for bilateral filter with border
//  Parameters:
//   filter           Type of bilateral filter. Possible value is ippiFilterBilateralGauss.
//   dstRoiSize   Size of the destination ROI
//   radius           Radius of circular neighborhood what defines pixels for calculation.
//   dataType         Data type of the source and desination images. Possible values
//                    are Ipp8u and Ipp32f.
//   numChannels      Number of channels in the images. Possible values are 1 and 3.
//   distMethodType   The type of method for definition of distance beetween pixel intensity.
//                    Possible value is ippDistNormL1.
//   valSquareSigma   square of Sigma for factor function for pixel intensity
//   posSquareSigma   square of Sigma for factor function for pixel position
//    pSpec           pointer to Spec
//  Return:
//    ippStsNoErr               OK
//    ippStsNullPtrErr          pointer ro Spec is NULL
//    ippStsSizeErr             size of dstRoiSize is less or equal 0
//    ippStsMaskSizeErr         radius is less or equal 0
//    ippStsNotSupportedModeErr filter or distMethod is not supported
//    ippStsDataTypeErr         Indicates an error when dataType has an illegal value.
//    ippStsNumChannelsErr      Indicates an error when numChannels has an illegal value.
//    ippStsBadArgErr           valSquareSigma or posSquareSigma is less or equal 0
*/
IPPFUN(IppStatus, ippiFilterBilateralBorderInit_LT, (IppiFilterBilateralType filter, IppiSizeL dstRoiSize, int radius, IppDataType dataType, int numChannels, IppiDistanceMethodType distMethod, Ipp32f valSquareSigma, Ipp32f posSquareSigma, IppiFilterBilateralSpec_LT *pSpecL))
{
    IppStatus  status = ippStsNoErr;
    Ipp32u numThreads; IppiSizeL dstRoiSize32;
    BilateralInfo *pBilateralInfo = 0;
    IppiSizeL pTileSize = { 0, 0 }, pLastSize = { 0, 0 }; IppiPointL splitImage;
    IppiSizeL maskSize;    IppSizeL pSpecSize;
    IppSizeL pBufSize,width = dstRoiSize.width;
    IppSizeL height = dstRoiSize.height;
    if (pSpecL == 0 )     return ippStsNullPtrErr;
    if (width <= 0 || height <= 0) return ippStsSizeErr;

    maskSize.height = maskSize.width = radius * 2 + 1;
    dstRoiSize32.height=dstRoiSize32.width=21;
    splitImage.x = splitImage.y = 0;

    ippGetNumThreads_LT((int*)&numThreads);

    ownGetBilateralSliceSize(dstRoiSize, maskSize, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)   pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)   pLastSize.height = pTileSize.height;
    status = ippiFilterBilateralBorderGetBufferSize_L(ippiFilterBilateralGauss, pLastSize, radius, dataType, numChannels, ippDistNormL1, &pSpecSize, &pBufSize);
    dstRoiSize.height = dstRoiSize.width = 100;
    pBilateralInfo = (BilateralInfo*)pSpecL;
    pSpecL = (IppiFilterBilateralSpec_LT*)((Ipp8u*)pSpecL + sizeof(BilateralInfo));
    status = ippiFilterBilateralBorderInit_L(filter, dstRoiSize32, radius, dataType, numChannels, distMethod, valSquareSigma, posSquareSigma, (IppiFilterBilateralSpec *)pSpecL);
    pBilateralInfo->bufsize = pBufSize;
    pBilateralInfo->lastTile.width = pLastSize.width;
    pBilateralInfo->lastTile.height = pLastSize.height;
    pBilateralInfo->tileSize.width = pTileSize.width;
    pBilateralInfo->tileSize.height = pTileSize.height;
    pBilateralInfo->split.x = splitImage.x;
    pBilateralInfo->split.y = splitImage.y;

    pBilateralInfo->radius = radius;
    return status;
}


/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippiFilterBilateralBorder_8u_C1R_L
//              ippiFilterBilateralBorder_8u_C3R_L
//              ippiFilterBilateralBorder_32f_C1R_L
//              ippiFilterBilateralBorder_32f_C3R_L
//  Purpose:    bilateral filter
//  Parameters:
//    pSrc         Pointer to the source image
//    srcStep      Step through the source image
//    pDst         Pointer to the destination image
//    dstStep      Step through the destination image
//    dstRoiSize   Size of the destination ROI
//    radius       Radius of circular neighborhood what defines pixels for calculation.
//    borderType   Type of border.
//    borderValue  Pointer to constant value to assign to pixels of the constant border. This parameter is applicable
//                 only to the ippBorderConst border type. If this pointer is NULL than the constant value is equal 0.
//    pSpec        Pointer to filter spec
//    pBuffer      Pointer ro work buffer
//  Return:
//    ippStsNoErr           OK
//    ippStsNullPtrErr      pointer to Src, Dst, Spec or Buffer is NULL
//    ippStsSizeErr         size of dstRoiSize is less or equal 0
//    ippStsContextMatchErr filter Spec is not match
//    ippStsNotEvenStepErr  Indicated an error when one of the step values is not divisible by 4
//                          for floating-point images.
//    ippStsBorderErr       Indicates an error when borderType has illegal value.
*/

IppStatus ippiFilterBilateralBorder_8u_C1R_LT_Fun (IppSizeL t, void *arg)
{
    ippiFilterBilateralBorder_8u_LT_Str * ts = (ippiFilterBilateralBorder_8u_LT_Str *)arg;
    const Ipp8u * pSrc = (const Ipp8u *)ts->pSrc; //const
    IppSizeL srcStep = ts->srcStep; 
    Ipp8u * pDst = ts->pDst; 
    IppSizeL dstStep = ts->dstStep;  
    IppiBorderType border = ts->border; 
    Ipp8u * borderValue = ts->borderValue; 
    const IppiFilterBilateralSpec_LT * pSpec = (const IppiFilterBilateralSpec_LT *)ts->pSpec; //const
    Ipp8u * pBuffer = ts->pBuffer;
    IppSizeL bufSize = ts->bufSize;
    IppSizeL numChannels = ts->numChannels;
    IppiPointL splitImage = ts->splitImage;
    IppiSizeL pTileSize = ts->pTileSize; 
    IppiSizeL pLastSize = ts->pLastSize;

    IppiSizeL roiSizeS;
    roiSizeS.height = pTileSize.height;
    IppSizeL w, h;
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;
    Ipp8u* pSrcRoi;
    Ipp8u* pDstRoi;
    int threadIdx = 0;
    IppSizeL firstGreaterIndex;
    IppSizeL tileOffsetSrc = 0;
    IppSizeL tileOffsetDst = 0;

    h = t / splitImage.x;
    w = t % splitImage.x;
    pSrcRoi = (Ipp8u*)((Ipp8u*)(pSrc + w * pTileSize.width*numChannels) + h * pTileSize.height * srcStep);
    pDstRoi = (Ipp8u*)((Ipp8u*)(pDst + w * pTileSize.width*numChannels) + h * pTileSize.height * dstStep);
    roiSizeS.height = pTileSize.height;
    firstGreaterIndex = pLastSize.height - pTileSize.height;
    if ((firstGreaterIndex < splitImage.y) && (pLastSize.height > pTileSize.height) && (firstGreaterIndex > 0)) {
        if (h >= (int)(splitImage.y - firstGreaterIndex)) {
            roiSizeS.height = pTileSize.height + 1;
            tileOffsetSrc = (firstGreaterIndex + h - splitImage.y) * srcStep;
            tileOffsetDst = (firstGreaterIndex + h - splitImage.y) * dstStep;
        }
    }
    else {
        if (pLastSize.height && (h == (int)(splitImage.y - 1))) {
            roiSizeS.height = pLastSize.height;
        }
    }
    roiSizeS.width = pTileSize.width;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;
    if ((splitImage.y > 1))
    {
        if (h == 0) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1)) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else  borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1))
    {
        if (w == 0) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (w == (int)(splitImage.x - 1)) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else  borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }
    /* Intel IPP function call */

   
    ippGetThreadIdx_LT(&threadIdx);  
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;

    pSrcRoi += tileOffsetSrc;
    pDstRoi += tileOffsetDst;
 
    return ippiFilterBilateralBorder_8u_C1R_L(pSrcRoi, srcStep, pDstRoi, dstStep, roiSizeS, borderTrdW, borderValue, (IppiFilterBilateralSpec*)pSpec, pBuf);
}

IPPFUN(IppStatus, ippiFilterBilateralBorder_8u_C1R_LT, (const Ipp8u *pSrc, IppSizeL srcStep, Ipp8u *pDst, IppSizeL dstStep, IppiSizeL roiSize, IppiBorderType border, Ipp8u *borderValue, const  IppiFilterBilateralSpec_LT *pSpec, Ipp8u* pBuffer))
{
    IppStatus statusAll;
    IppSizeL numChannels = 1, radius;
    Ipp32u numThreads = 1;
    IppiSizeL maskSize;
    BilateralInfo *pBilateralInfo;           /* Bilateral Info structure */
    IppSizeL bufSize;
    IppiPointL splitImage = { 1, 1 };
    IppiSizeL pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)     return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;
    if (pSpec == 0 || pBuffer == 0)     return ippStsNullPtrErr;

    pBilateralInfo = (BilateralInfo*)pSpec;
    pSpec = (IppiFilterBilateralSpec_LT*)((Ipp8u*)pSpec + sizeof(BilateralInfo));
    bufSize = pBilateralInfo->bufsize;
    splitImage.x = pBilateralInfo->split.x; splitImage.y = pBilateralInfo->split.y;
    pTileSize.width = pBilateralInfo->tileSize.width; pTileSize.height = pBilateralInfo->tileSize.height;
    pLastSize.width = pBilateralInfo->lastTile.width; pLastSize.height = pBilateralInfo->lastTile.height;
    radius = pBilateralInfo->radius;
    maskSize.height = maskSize.width = radius * 2 + 1;
    statusAll = ippStsNoErr;

    ippGetNumThreads_LT((int*)&numThreads);

    if ((numThreads == 1) || ((roiSize.height / (IppSizeL)numThreads) < TYLE_S))
    {   
        /* Intel IPP function call */
        statusAll = ippiFilterBilateralBorder_8u_C1R_L(pSrc, srcStep, pDst, dstStep, roiSize, border, borderValue, (IppiFilterBilateralSpec*)pSpec, pBuffer);
    }
    else
    {
        IppSizeL numTiles = splitImage.x*splitImage.y;
        ippiFilterBilateralBorder_8u_LT_Str ts;
        fBilateralBrdThreadingStructureEncode_8u ((Ipp8u *)pSrc, srcStep, pDst, dstStep, border, borderValue, (IppiFilterBilateralSpec_LT *)pSpec, pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_LT(numTiles, (void*)&ts, ippiFilterBilateralBorder_8u_C1R_LT_Fun);
    }
    return statusAll;
}

IppStatus ippiFilterBilateralBorder_8u_C3R_LT_Fun (IppSizeL t, void *arg)
{
    ippiFilterBilateralBorder_8u_LT_Str * ts = (ippiFilterBilateralBorder_8u_LT_Str *)arg;
    const Ipp8u * pSrc = (const Ipp8u *)ts->pSrc; //const
    IppSizeL srcStep = ts->srcStep; 
    Ipp8u * pDst = ts->pDst; 
    IppSizeL dstStep = ts->dstStep;  
    IppiBorderType border = ts->border; 
    Ipp8u * borderValue = ts->borderValue; 
    const IppiFilterBilateralSpec_LT * pSpec = (const IppiFilterBilateralSpec_LT *)ts->pSpec; //const
    Ipp8u * pBuffer = ts->pBuffer;
    IppSizeL bufSize = ts->bufSize;
    IppSizeL numChannels = ts->numChannels;
    IppiPointL splitImage = ts->splitImage;
    IppiSizeL pTileSize = ts->pTileSize; 
    IppiSizeL pLastSize = ts->pLastSize;
    int threadIdx = 0;
    IppSizeL firstGreaterIndex;

    IppiSizeL roiSizeS;
    roiSizeS.height = pTileSize.height;
    IppSizeL w, h;
    IppiBorderType borderTrd = border;
    IppiBorderType borderTrdW = borderTrd;
    Ipp8u* pSrcRoi;
    Ipp8u* pDstRoi;
    IppSizeL tileOffsetSrc = 0;
    IppSizeL tileOffsetDst = 0;

    h = t / splitImage.x;
    w = t % splitImage.x;
    pSrcRoi = (Ipp8u*)((Ipp8u*)(pSrc + w * pTileSize.width*numChannels) + h * pTileSize.height * srcStep);
    pDstRoi = (Ipp8u*)((Ipp8u*)(pDst + w * pTileSize.width*numChannels) + h * pTileSize.height * dstStep);
    roiSizeS.height = pTileSize.height;
    firstGreaterIndex = pLastSize.height - pTileSize.height;
    if ((firstGreaterIndex < splitImage.y) && (pLastSize.height > pTileSize.height) && (firstGreaterIndex > 0)) {
        if (h >= (int)(splitImage.y - firstGreaterIndex)) {
            roiSizeS.height = pTileSize.height + 1;
            tileOffsetSrc = (firstGreaterIndex + h - splitImage.y) * srcStep;
            tileOffsetDst = (firstGreaterIndex + h - splitImage.y) * dstStep;
        }
    }
    else {
        if (pLastSize.height && (h == (int)(splitImage.y - 1))) {
            roiSizeS.height = pLastSize.height;
        }
    }
    roiSizeS.width = pTileSize.width;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;
    if ((splitImage.y > 1))
    {
        if (h == 0) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1)) borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemTop);
        else  borderTrd = (IppiBorderType)((int)border | (int)ippBorderInMemBottom | (int)ippBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1))
    {
        if (w == 0) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight);
        else if (w == (int)(splitImage.x - 1)) borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemLeft);
        else  borderTrdW = (IppiBorderType)((int)borderTrd | (int)ippBorderInMemRight | (int)ippBorderInMemLeft);
    }
    /* Intel IPP function call */

   
    ippGetThreadIdx_LT(&threadIdx);  
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;

    pSrcRoi += tileOffsetSrc;
    pDstRoi += tileOffsetDst;
 
    return ippiFilterBilateralBorder_8u_C3R_L(pSrcRoi, srcStep, pDstRoi, dstStep, roiSizeS, borderTrdW, borderValue, (IppiFilterBilateralSpec*)pSpec, pBuf);
}

IPPFUN(IppStatus, ippiFilterBilateralBorder_8u_C3R_LT, (const Ipp8u *pSrc, IppSizeL srcStep, Ipp8u *pDst, IppSizeL dstStep, IppiSizeL roiSize, IppiBorderType border, Ipp8u *borderValue, const  IppiFilterBilateralSpec_LT *pSpec, Ipp8u* pBuffer))
{
    IppStatus statusAll;
    IppSizeL numChannels = 3, radius;
    Ipp32u numThreads;
    IppiSizeL maskSize;
    BilateralInfo *pBilateralInfo;           /* Bilateral Info structure */
    IppSizeL bufSize;
    IppiPointL splitImage = { 1, 1 };
    IppiSizeL pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)     return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;
    if (pSpec == 0 || pBuffer == 0)     return ippStsNullPtrErr;

    pBilateralInfo = (BilateralInfo*)pSpec;
    pSpec = (IppiFilterBilateralSpec_LT*)((Ipp8u*)pSpec + sizeof(BilateralInfo));
    bufSize = pBilateralInfo->bufsize;
    splitImage.x = pBilateralInfo->split.x; splitImage.y = pBilateralInfo->split.y;
    pTileSize.width = pBilateralInfo->tileSize.width; pTileSize.height = pBilateralInfo->tileSize.height;
    pLastSize.width = pBilateralInfo->lastTile.width; pLastSize.height = pBilateralInfo->lastTile.height;
    radius = pBilateralInfo->radius;
    maskSize.height = maskSize.width = radius * 2 + 1;
    statusAll = ippStsNoErr;

    ippGetNumThreads_LT((int*)&numThreads);

    if ((numThreads == 1) || ((roiSize.height / (IppSizeL)numThreads) < TYLE_S))
    {   /* Intel IPP function call */
        statusAll = ippiFilterBilateralBorder_8u_C3R_L(pSrc, srcStep, pDst, dstStep, roiSize, border, borderValue, (IppiFilterBilateralSpec*)pSpec, pBuffer);
    }
    else
    {
        IppSizeL numTiles = splitImage.x*splitImage.y;
        ippiFilterBilateralBorder_8u_LT_Str ts;
        fBilateralBrdThreadingStructureEncode_8u ((Ipp8u *)pSrc, srcStep, pDst, dstStep, border, borderValue, (IppiFilterBilateralSpec_LT *)pSpec, pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_LT(numTiles, (void*)&ts, ippiFilterBilateralBorder_8u_C3R_LT_Fun);
    }
    return statusAll;
}
