subpar/range_8hpp_source.html

#ifndef SUBPAR_RANGE_HPP

#define SUBPAR_RANGE_HPP


#include <limits>

#include <type_traits>


#ifndef SUBPAR_CUSTOM_PARALLELIZE_RANGE

#include <vector>

#include <stdexcept>

#include <thread>

#endif


namespace subpar {


namespace internal {


template<typename Task_>

bool ge(int num_workers, Task_ num_tasks) { // We already assume that both of them are non-negative at this point.

    return static_cast<typename std::make_unsigned<int>::type>(num_workers) >= static_cast<typename std::make_unsigned<Task_>::type>(num_tasks);

}


template<bool nothrow_, typename NumWorkers_>

auto create_error_vector(NumWorkers_ num_workers) {

    if constexpr(nothrow_) {

        return 0; // Avoid instantiating a vector if it is known that the function can't throw.

    } else {

        typedef std::vector<std::exception_ptr> Output;

        if (static_cast<typename std::make_unsigned<NumWorkers_>::type>(num_workers) > std::numeric_limits<typename Output::size_type>::max()) {

            throw std::runtime_error("cannot allocate the 'errors' vector");

        }

        return Output(num_workers);

    }

}


}

template<typename Task_>


int sanitize_num_workers(int num_workers, Task_ num_tasks) {

    if (num_workers <= 0) {

        return (num_tasks > 0);

    }


    if (internal::ge(num_workers, num_tasks)) {

        return num_tasks;

    }


    return num_workers;

}


template<bool nothrow_ = false, typename Task_, class Run_>


void parallelize_range(int num_workers, Task_ num_tasks, Run_ run_task_range) {

#ifdef SUBPAR_CUSTOM_PARALLELIZE_RANGE

    if constexpr(nothrow_) {

#ifdef SUBPAR_CUSTOM_PARALLELIZE_RANGE_NOTHROW

        SUBPAR_CUSTOM_PARALLELIZE_RANGE_NOTHROW(num_workers, num_tasks, run_task_range);

#else

        SUBPAR_CUSTOM_PARALLELIZE_RANGE(num_workers, num_tasks, run_task_range);

#endif

    } else {

        SUBPAR_CUSTOM_PARALLELIZE_RANGE(num_workers, num_tasks, run_task_range);

    }


#else

    if (num_tasks == 0) {

        return;

    }


    if (num_workers <= 1 || num_tasks == 1) {

        run_task_range(0, 0, num_tasks);

        return;

    }


    // All workers with indices below 'remainder' get an extra task to fill up the remainder.

    Task_ tasks_per_worker;

    int remainder;

    if (internal::ge(num_workers, num_tasks)) {

        num_workers = num_tasks;

        tasks_per_worker = 1;

        remainder = 0;

    } else {

        tasks_per_worker = num_tasks / num_workers;

        remainder = num_tasks % num_workers;

    }


    auto errors = internal::create_error_vector<nothrow_>(num_workers);


#if defined(_OPENMP) && !defined(SUBPAR_NO_OPENMP_RANGE) && !defined(SUBPAR_NO_OPENMP)

#define SUBPAR_USES_OPENMP 1

#define SUBPAR_USES_OPENMP_RANGE 1


    // OpenMP doesn't guarantee that we'll actually start 'num_workers' workers,

    // so we need to do a loop here to ensure that each task range is executed.

    #pragma omp parallel for num_threads(num_workers)

    for (int w = 0; w < num_workers; ++w) {

        Task_ start = w * tasks_per_worker + (w < remainder ? w : remainder); // need to shift the start by the number of previous 't' that added a remainder.

        Task_ length = tasks_per_worker + (w < remainder);


        if constexpr(nothrow_) {

            run_task_range(w, start, length);

        } else {

            try {

                run_task_range(w, start, length);

            } catch (...) {

                errors[w] = std::current_exception();

            }

        }

    }


#else

// Wiping it out, just in case.

#undef SUBPAR_USES_OPENMP

#undef SUBPAR_USES_OPENMP_RANGE


    Task_ start = 0;

    std::vector<std::thread> workers;

    workers.reserve(num_workers);


    for (int w = 0; w < num_workers; ++w) {

        Task_ length = tasks_per_worker + (w < remainder);


        if constexpr(nothrow_) {

            workers.emplace_back(run_task_range, w, start, length);

        } else {

            workers.emplace_back([&run_task_range,&errors](int w, Task_ start, Task_ length) -> void {

                try {

                    run_task_range(w, start, length);

                } catch (...) {

                    errors[w] = std::current_exception();

                }

            }, w, start, length);

        }


        start += length;

    }


    for (auto& wrk : workers) {

        wrk.join();

    }

#endif


    if constexpr(!nothrow_) {

        for (const auto& e : errors) {

            if (e) {

                std::rethrow_exception(e);

            }

        }

    }

#endif

}


// Back-compatibility only.

template<typename Task_, class Run_>

void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range) {

    parallelize_range<false, Task_, Run_>(num_workers, num_tasks, std::move(run_task_range));

}

}


#endif

subpar
Substitutable parallelization functions.

subpar::sanitize_num_workers
int sanitize_num_workers(int num_workers, Task_ num_tasks)
Adjust the number of workers to the number of tasks in parallelize_range().
Definition range.hpp:67

subpar::parallelize_range
void parallelize_range(int num_workers, Task_ num_tasks, Run_ run_task_range)
Parallelize a range of tasks across multiple workers.
Definition range.hpp:128