I am implementing a parallel quicksort:
- Partition the input range,
- Conquer the left subrange in this thread.
- Conquer the right subrange in a new (single) thread while (2) is running.
In OpenMP, how can I do (3)?
My code broken code follows:
#include <algorithm>
#include <chrono>
#include <iostream>
#include <omp.h>
#define N 50000000
using std::swap;
using std::cout;
typedef unsigned long long data_t;
static std::chrono::milliseconds get_millis() {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch());
}
static int partition(data_t* data, int lo, int hi) {
data_t pivot = data[lo];
int i = lo - 1;
int j = hi + 1;
while (true) {
do i++; while (data[i] < pivot);
do j--; while (data[j] > pivot);
if (i >= j) return j;
data_t temp = data[i];
data[i] = data[j];
data[j] = temp;
}
}
static void sortImpl(data_t* data,
int lo,
int hi,
int threads) {
if (threads <= 1 || hi - lo < 1000) {
std::sort(data + lo, data + hi + 1);
return;
}
int pivot = partition(data, lo, hi);
int left_threads = threads / 2;
int right_threads = threads - left_threads;
#pragma omp parallel shared(data) num_threads(1)
{
sortImpl(data,
lo,
pivot,
left_threads);
}
sortImpl(data,
pivot + 1,
hi,
right_threads);
}
void psort(int n, data_t *data) {
sortImpl(data,
0,
n - 1,
omp_get_max_threads());
}
static void ssort(data_t* data, int lo, int hi) {
if (lo < hi) {
int pivot = partition(data, lo, hi);
ssort(data, lo, pivot);
ssort(data, pivot + 1, hi);
}
}
void ssort(int n, data_t* data) {
ssort(data, 0, n - 1);
}
int main() {
std::cout << "Starting benchmark.\n";
data_t* arr = new data_t[N];
data_t* arr2 = new data_t[N];
for (int i = 0; i < N; i++) {
arr[i] = rand();
arr2[i] = arr[i];
}
auto st = get_millis();
psort(N, arr);
auto et = get_millis();
std::cout << "My parallel quicksort duration: " << et - st << "\n";
st = get_millis();
std::sort(arr2, arr2 + N);
et = get_millis();
std::cout << "std::sort duration: " << et - st << "\n";
for (int i = 0; i < N; i++) {
if (arr[i] != arr2[i]) {
std::cout << arr[i] << " != " << arr2[i] << " at " << i << "\n";
break;
}
}
std::cout << "Equals: " << std::boolalpha
<< std::equal(arr2, arr2 + N, arr) << "\n";
return 0;
}
The output is:
Starting benchmark.
My parallel quicksort duration: 11482ms
std::sort duration: 9651ms
Equals: true
Basically, the logic of the parallel quicksort is the same as in the sequential std::sort, yet it is slower by around 19 percent.
#pragma omp parallelthen#pragma omp masterthen run your parallel quick-sort, and finally do#pragma omp task(be careful about variable sharing though).std::sortin parallel. It may or may not scale well regarding the target implementationsortImpl()before exiting the parallel region and execute the other call. So nothing is executed concurently.