/*
 *  Copyright 2008-2013 NVIDIA Corporation
 *  Modifications Copyright© 2019-2025 Advanced Micro Devices, Inc. All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

#include <thrust/count.h>
#include <thrust/execution_policy.h>

#include <unittest/unittest.h>

#ifdef THRUST_TEST_DEVICE_SIDE
template <typename ExecutionPolicy, typename Iterator, typename T, typename Iterator2>
__global__ void count_kernel(ExecutionPolicy exec, Iterator first, Iterator last, T value, Iterator2 result)
{
  *result = thrust::count(exec, first, last, value);
}

template <typename T, typename ExecutionPolicy>
void TestCountDevice(ExecutionPolicy exec, const size_t n)
{
  thrust::host_vector<T> h_data   = unittest::random_samples<T>(n);
  thrust::device_vector<T> d_data = h_data;

  thrust::device_vector<size_t> d_result(1);

  size_t h_result = thrust::count(h_data.begin(), h_data.end(), T(5));

  count_kernel<<<1, 1>>>(exec, d_data.begin(), d_data.end(), T(5), d_result.begin());
  cudaError_t const err = cudaDeviceSynchronize();
  ASSERT_EQUAL(cudaSuccess, err);

  ASSERT_EQUAL(h_result, d_result[0]);
}

template <typename T>
void TestCountDeviceSeq(const size_t n)
{
  TestCountDevice<T>(thrust::seq, n);
}
DECLARE_VARIABLE_UNITTEST(TestCountDeviceSeq);

template <typename T>
void TestCountDeviceDevice(const size_t n)
{
  TestCountDevice<T>(thrust::device, n);
}
DECLARE_VARIABLE_UNITTEST(TestCountDeviceDevice);

template <typename ExecutionPolicy, typename Iterator, typename Predicate, typename Iterator2>
__global__ void count_if_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Predicate pred, Iterator2 result)
{
  *result = thrust::count_if(exec, first, last, pred);
}

template <typename T>
struct greater_than_five
{
  _CCCL_HOST_DEVICE bool operator()(const T& x) const
  {
    return x > 5;
  }
};

template <typename T, typename ExecutionPolicy>
void TestCountIfDevice(ExecutionPolicy exec, const size_t n)
{
  thrust::host_vector<T> h_data   = unittest::random_samples<T>(n);
  thrust::device_vector<T> d_data = h_data;

  thrust::device_vector<size_t> d_result(1);

  size_t h_result = thrust::count_if(h_data.begin(), h_data.end(), greater_than_five<T>());
  count_if_kernel<<<1, 1>>>(exec, d_data.begin(), d_data.end(), greater_than_five<T>(), d_result.begin());
  cudaError_t const err = cudaDeviceSynchronize();
  ASSERT_EQUAL(cudaSuccess, err);

  ASSERT_EQUAL(h_result, d_result[0]);
}

template <typename T>
void TestCountIfDeviceSeq(const size_t n)
{
  TestCountIfDevice<T>(thrust::seq, n);
}
DECLARE_VARIABLE_UNITTEST(TestCountIfDeviceSeq);

template <typename T>
void TestCountIfDeviceDevice(const size_t n)
{
  TestCountIfDevice<T>(thrust::device, n);
}
DECLARE_VARIABLE_UNITTEST(TestCountIfDeviceDevice);
#endif

void TestCountCudaStreams()
{
  thrust::device_vector<int> data{1, 1, 0, 0, 1};

  cudaStream_t s;
  cudaStreamCreate(&s);

  ASSERT_EQUAL(thrust::count(thrust::cuda::par.on(s), data.begin(), data.end(), 0), 2);
  ASSERT_EQUAL(thrust::count(thrust::cuda::par.on(s), data.begin(), data.end(), 1), 3);
  ASSERT_EQUAL(thrust::count(thrust::cuda::par.on(s), data.begin(), data.end(), 2), 0);

  cudaStreamDestroy(s);
}
DECLARE_UNITTEST(TestCountCudaStreams);
