/*******************************************************************************
* Copyright 2015 Intel Corporation.
*
*
* This software and the related documents are Intel copyrighted materials, and your use of them is governed by
* the express license under which they were provided to you ('License'). Unless the License provides otherwise,
* you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related
* documents without Intel's prior written permission.
* This software and the related documents are provided as is, with no express or implied warranties, other than
* those that are expressly stated in the License.
*******************************************************************************/

/*
//   Purpose: Functions of Sqrt operation
//   Contents:
//       ippiSqrt_16s_C1IRSfs_T
*/

#include "pisimplearithm_t.h"

static IppStatus ippiSqrt_16s_C1IRSfs_T_Fun(int i, void *arg)
{
    ippiSimpleArithmetics_16s_T_Str *ts = (ippiSimpleArithmetics_16s_T_Str *)arg;

    Ipp16s* pSrcDst       = ts->pDst;
    int srcDstStep        = ts->dstStep;

    IppiSize roiSize;
    roiSize.width         = ts->roiSize.width;
    roiSize.height        = ts->roiSize.height;
    int scaleFactor       = ts->scaleFactor;
    IppiPoint  splitImage = ts->splitImage;
    IppiSize tileSize     = ts->tileSize;
    IppiSize tailSize     = ts->tailSize;

    Ipp16s       *pSrcDstRoi;
    IppiPoint     roiOffset = { 0, 0 };

    owniGetTileParamsByIndex_T(i, splitImage, tileSize, tailSize, &roiOffset, &roiSize);

    /* compute pointer to ROI */
    pSrcDstRoi = owniGetImagePointer_16s_C1 (pSrcDst, srcDstStep, roiOffset.x, roiOffset.y);

    return ippiSqrt_16s_C1IRSfs(pSrcDstRoi, srcDstStep, roiSize, scaleFactor);
}

IPPFUN (IppStatus, ippiSqrt_16s_C1IRSfs_T, (Ipp16s* pSrcDst, int srcDstStep, IppiSize roiSize, int scaleFactor))
{
    IppStatus status = ippStsNoErr;

    if (pSrcDst == 0)                              return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;

    int numTiles = 0;
    int pixelSize = sizeof (Ipp16s);

    IppiPoint splitImage;
    IppiSize  tileSize, tailSize;
    int   minTileSize = IPP_MIN (IPP64_MIN_ADD_2D / pixelSize, IPP_MAX_32S / pixelSize);

    /* split the image to tiles */
    owniSplitUniform2D_T(roiSize, minTileSize, &splitImage, &tileSize, &tailSize);

    if (splitImage.x == 1 && splitImage.y == 1)
    {
        status = ippiSqrt_16s_C1IRSfs(pSrcDst, srcDstStep, roiSize, scaleFactor);
    }
    else
    {
        numTiles = splitImage.x * splitImage.y;
        ippiSimpleArithmetics_16s_T_Str ts;
        simpleArithmeticsThreadingStructureEncode_16s ((Ipp16s*)0, 0, (Ipp16s*)0, 0, pSrcDst, srcDstStep, roiSize, scaleFactor,
                                                       splitImage, tileSize, tailSize, &ts);
        status = ippParallelFor_T(numTiles, (void*)&ts, ippiSqrt_16s_C1IRSfs_T_Fun);
    }

    return status;
}

static IppStatus ippiSqrt_32s16s_C1RSfs_T_Fun(int i, void *arg)
{
	ippiSimpleArithmetics_32s16s_T_Str *ts = (ippiSimpleArithmetics_32s16s_T_Str *)arg;
	IppStatus status = ippStsNoErr;

	Ipp32s* pSrc = ts->pSrc1;
	int srcStep = ts->src1Step;
	Ipp16s* pDst = ts->pDst;
	int dstStep = ts->dstStep;

	IppiSize roiSize;
	roiSize.width = ts->roiSize.width;
	roiSize.height = ts->roiSize.height;
	int scaleFactor = ts->scaleFactor;
	IppiPoint splitImage = ts->splitImage;
	IppiSize tileSize = ts->tileSize;
	IppiSize tailSize = ts->tailSize;

	Ipp32s       *pSrcRoi;
	Ipp16s       *pDstRoi;
	IppiPoint     roiOffset = { 0, 0 };

	owniGetTileParamsByIndex_T(i, splitImage, tileSize, tailSize, &roiOffset, &roiSize);

	/* compute pointer to ROI */
	pSrcRoi = owniGetImagePointer_32s_C1(pSrc, srcStep, roiOffset.x, roiOffset.y);
	pDstRoi = owniGetImagePointer_16s_C1(pDst, dstStep, roiOffset.x, roiOffset.y);

	Ipp32s *pLineSrc = 0;
	Ipp16s *pLineDst = 0;

	for (int i = 0; i < roiSize.height; ++i) {
		pLineSrc = (Ipp32s *)((Ipp8u *)pSrcRoi + i * srcStep);
		pLineDst = (Ipp16s *)((Ipp8u *)pDstRoi + i * dstStep);

		status = ippsSqrt_32s16s_Sfs(pLineSrc, pLineDst, roiSize.width, scaleFactor);
		if (status != ippStsNoErr) return status;
	}

	return status;
}

IPPFUN(IppStatus, ippiSqrt_32s16s_C1RSfs_T, (Ipp32s* pSrc, int srcStep, Ipp16s* pDst, int dstStep, IppiSize roiSize, int scaleFactor))
{
	IppStatus status = ippStsNoErr;

	if (pSrc == 0 || pDst == 0)                    return ippStsNullPtrErr;
	if (roiSize.width <= 0 || roiSize.height <= 0) return ippStsSizeErr;

	int numTiles = 0;
	int pixelSize = sizeof (Ipp32s);

	IppiPoint splitImage;
	IppiSize  tileSize, tailSize;
	int   minTileSize = IPP_MIN(IPP64_MIN_ADD_2D / pixelSize, IPP_MAX_32S / pixelSize);

	/* split the image to tiles */
	owniSplitUniform2D_T(roiSize, minTileSize, &splitImage, &tileSize, &tailSize);

	if (splitImage.x == 1 && splitImage.y == 1)
	{
		Ipp32s *pLineSrc = 0;
		Ipp16s *pLineDst = 0;

		for (int i = 0; i < roiSize.height; ++i) {
			pLineSrc = (Ipp32s *)((Ipp8u *)pSrc + i * srcStep);
			pLineDst = (Ipp16s *)((Ipp8u *)pDst + i * dstStep);

			status = ippsSqrt_32s16s_Sfs(pLineSrc, pLineDst, roiSize.width, scaleFactor);
			if (status != ippStsNoErr) return status;
		}
		return status;
	}
	else
	{
		numTiles = splitImage.x * splitImage.y;
		ippiSimpleArithmetics_32s16s_T_Str ts;
		simpleArithmeticsThreadingStructureEncode_32s16s(pSrc, srcStep, (Ipp32s*)0, 0, pDst, dstStep, roiSize, scaleFactor,
			splitImage, tileSize, tailSize, &ts);
		status = ippParallelFor_T(numTiles, (void*)&ts, ippiSqrt_32s16s_C1RSfs_T_Fun);
	}

	return status;
}
