admin管理员组

文章数量:1565367

基本思想:对于for的任务分担 schedule(static|dynamic|guided|runtime[size])

(1)for的任务分担

#pragma omp parallel
{
#pragma omp for
for(int i=0;i<num/2;i++)//num此为偶数
{
 .....
}
#pragma omp for
for(int i=num/2;i<num;i++) 
{
.......
}
}

测试代码

#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;

void sequentialProgram(int num)
{

    for(int i=0;i<num;i++)
    {
       // std::cout<<"hello world"<<std::endl;
        printf("%s the current thread id: %d\n","hello world",omp_get_thread_num());
    }
}

void  parallelProgram(int num)
{


#pragma omp parallel
    {
#pragma omp for
    for(int i=0;i<num/2;i++)
    {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("%s the current thread id: %d\n","A hello world",omp_get_thread_num());
}
#pragma omp for
    for(int i=num/2;i<num;i++) {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("%s the current thread id: %d\n","B hello world",omp_get_thread_num());
    }
    }
}

int main() {


    int num=omp_get_num_procs()*2;
    auto start_time=std::chrono::steady_clock::now();
    sequentialProgram(num);
    auto end_time=std::chrono::steady_clock::now();
    std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

    start_time=std::chrono::steady_clock::now();
    parallelProgram(num);
    end_time=std::chrono::steady_clock::now();
    std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
    return 0;
}

测试结果,在一个并行域中,对多个for进行制导指令处理,可以使用调度指令简化完成这一操作

F:\OpenMP\cmake-build-debug\OpenMP.exe
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
hello world the current thread id: 0
sequentialProgram elapse time: 0.0776085 seconds
A hello world the current thread id: 1
A hello world the current thread id: 0
A hello world the current thread id: 3
A hello world the current thread id: 5
A hello world the current thread id: 7
A hello world the current thread id: 10
A hello world the current thread id: 9
A hello world the current thread id: 8
A hello world the current thread id: 2
A hello world the current thread id: 4
A hello world the current thread id: 6
A hello world the current thread id: 11
B hello world the current thread id: 1
B hello world the current thread id: 0
B hello world the current thread id: 7
B hello world the current thread id: 9
B hello world the current thread id: 2
B hello world the current thread id: 6
B hello world the current thread id: 4
B hello world the current thread id: 10
B hello world the current thread id: 3
B hello world the current thread id: 8
B hello world the current thread id: 5
B hello world the current thread id: 11
parallelProgram elapse time: 0.0527985 seconds

Process finished with exit code 0

(2)使用for的调度指令schedule

#pragma omp parallel for schedule(static|dynamic}guided|runtime[size])
 for (int i = 0; i < num; i++) 
{
       .......
    }

 当写成

#pragma omp parallel for
等价
#pragma omp parallel for schedule(static)
等价
#pragma omp parallel for schedule(static,num/omp_get_num_procs()) //  num=omp_get_num_procs()*2;

其中static 设置为多少线程来处理迭代计算任务

其中size 为可选项,当不设置size参数时,默认for循环的线程以num/omp_get_num_procs()来分配

测试代码

#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;

void sequentialProgram(int num)
{

    for(int i=0;i<num;i++)
    {
       // std::cout<<"hello world"<<std::endl;
        printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
    }
}

void  parallelProgram(int num) {

//#pragma omp parallel for   
//#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static,2) // 上述三种预处理指令是一样的效果 注意设置的num循环测试
    for (int i = 0; i < num; i++) {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
    }
}

int main() {


    int num=omp_get_num_procs()*2;
    auto start_time=std::chrono::steady_clock::now();
    sequentialProgram(num);
    auto end_time=std::chrono::steady_clock::now();
    std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

    start_time=std::chrono::steady_clock::now();
    parallelProgram(num);
    end_time=std::chrono::steady_clock::now();
    std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
    return 0;
}

测试结果是相同的

F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.0422739 seconds
i=0 the current thread id: 0
i=1 the current thread id: 0
i=4 the current thread id: 2
i=5 the current thread id: 2
i=14 the current thread id: 7
i=15 the current thread id: 7
i=18 the current thread id: 9
i=19 the current thread id: 9
i=16 the current thread id: 8
i=17 the current thread id: 8
i=12 the current thread id: 6
i=13 the current thread id: 6
i=2 the current thread id: 1
i=3 the current thread id: 1
i=10 the current thread id: 5
i=11 the current thread id: 5
i=6 the current thread id: 3
i=7 the current thread id: 3
i=8 the current thread id: 4
i=9 the current thread id: 4
i=22 the current thread id: 11
i=23 the current thread id: 11
i=20 the current thread id: 10
i=21 the current thread id: 10
parallelProgram elapse time: 0.0412098 seconds

Process finished with exit code 0

(3)虽然参数static均衡的分担任务,但是存在某些线程处理速度上的差异,因此引入dynamic

#pragma omp parallel for schedule(dynamic) 
    for (int i = 0; i < num; i++) {
      ......
    }
}

测试代码

#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;

void sequentialProgram(int num)
{

    for(int i=0;i<num;i++)
    {
       // std::cout<<"hello world"<<std::endl;
        printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
    }
}

void  parallelProgram(int num) {


#pragma omp parallel for schedule(dynamic) 
    for (int i = 0; i < num; i++) {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
    }
}

int main() {


    int num=omp_get_num_procs()*2;
    auto start_time=std::chrono::steady_clock::now();
    sequentialProgram(num);
    auto end_time=std::chrono::steady_clock::now();
    std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

    start_time=std::chrono::steady_clock::now();
    parallelProgram(num);
    end_time=std::chrono::steady_clock::now();
    std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
    return 0;
}

测试结果可以看出,线程id=9处理速度较快,因此承担了更多的任务,当然也可以使用size进行限制线程处理任务的数量~

F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.041236 seconds
i=0 the current thread id: 2
i=6 the current thread id: 9
i=13 the current thread id: 9
i=14 the current thread id: 9
i=15 the current thread id: 9
i=16 the current thread id: 9
i=17 the current thread id: 9
i=18 the current thread id: 9
i=19 the current thread id: 9
i=20 the current thread id: 9
i=21 the current thread id: 9
i=22 the current thread id: 9
i=23 the current thread id: 9
i=5 the current thread id: 11
i=3 the current thread id: 1
i=4 the current thread id: 8
i=7 the current thread id: 4
i=1 the current thread id: 10
i=2 the current thread id: 3
i=8 the current thread id: 0
i=9 the current thread id: 6
i=10 the current thread id: 7
i=11 the current thread id: 5
i=12 the current thread id: 2
parallelProgram elapse time: 0.0399313 seconds

Process finished with exit code 0

(4)guided 采用启发式调度算法,开始分配较大的块,然后逐渐变小,最后分配给每个线程的任务为size数量,如果没设置size,将在最后分配给每个任务量为1

#pragma omp parallel for schedule(guided) 
    for (int i = 0; i < num; i++) {
      .....
    }
}

测试代码

#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;

void sequentialProgram(int num)
{

    for(int i=0;i<num;i++)
    {
       // std::cout<<"hello world"<<std::endl;
        printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
    }
}

void  parallelProgram(int num) {


#pragma omp parallel for schedule(guided) 
    for (int i = 0; i < num; i++) {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
    }
}

int main() {


    int num=omp_get_num_procs()*2-5;
    auto start_time=std::chrono::steady_clock::now();
    sequentialProgram(num);
    auto end_time=std::chrono::steady_clock::now();
    std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

    start_time=std::chrono::steady_clock::now();
    parallelProgram(num);
    end_time=std::chrono::steady_clock::now();
    std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
    return 0;
}

测试结果,第一次先为每个线程分配两个任务,然后最后变成每个线程只能承担一个任务 执行

F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
sequentialProgram elapse time: 0.033042 seconds
i=0 the current thread id: 0
i=1 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=6 the current thread id: 5
i=7 the current thread id: 5
i=2 the current thread id: 3
i=3 the current thread id: 3
i=13 the current thread id: 6
i=14 the current thread id: 2
i=15 the current thread id: 11
i=10 the current thread id: 8
i=12 the current thread id: 9
i=8 the current thread id: 7
i=9 the current thread id: 1
i=11 the current thread id: 10
i=4 the current thread id: 4
i=5 the current thread id: 4
parallelProgram elapse time: 0.0334159 seconds

Process finished with exit code 0

(5)runtime 设置之后,将获取系统的任务属性来来调用上述三种中的一种方法,我测试一下,好像每次都是以dynamic 的方式调用~~

#pragma omp parallel for schedule(runtime)
    for (int i = 0; i < num; i++) {
       ......
    }
}

测试代码

#include <iostream>
#include <omp.h>
#include<chrono>
#include<vector>
#include<thread>
using namespace std;
using namespace chrono;

void sequentialProgram(int num)
{

    for(int i=0;i<num;i++)
    {
       // std::cout<<"hello world"<<std::endl;
        printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
    }
}

void  parallelProgram(int num) {


#pragma omp parallel for schedule(runtime)
    for (int i = 0; i < num; i++) {
        //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
        printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
    }
}

int main() {


    int num=omp_get_num_procs()*2;
    auto start_time=std::chrono::steady_clock::now();
    sequentialProgram(num);
    auto end_time=std::chrono::steady_clock::now();
    std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

    start_time=std::chrono::steady_clock::now();
    parallelProgram(num);
    end_time=std::chrono::steady_clock::now();
    std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
    return 0;
}

测试结果

F:\OpenMP\cmake-build-debug\OpenMP.exe
i=0 the current thread id: 0
i=1 the current thread id: 0
i=2 the current thread id: 0
i=3 the current thread id: 0
i=4 the current thread id: 0
i=5 the current thread id: 0
i=6 the current thread id: 0
i=7 the current thread id: 0
i=8 the current thread id: 0
i=9 the current thread id: 0
i=10 the current thread id: 0
i=11 the current thread id: 0
i=12 the current thread id: 0
i=13 the current thread id: 0
i=14 the current thread id: 0
i=15 the current thread id: 0
i=16 the current thread id: 0
i=17 the current thread id: 0
i=18 the current thread id: 0
i=19 the current thread id: 0
i=20 the current thread id: 0
i=21 the current thread id: 0
i=22 the current thread id: 0
i=23 the current thread id: 0
sequentialProgram elapse time: 0.0410057 seconds
i=0 the current thread id: 1
i=8 the current thread id: 9
i=13 the current thread id: 9
i=14 the current thread id: 9
i=15 the current thread id: 9
i=16 the current thread id: 9
i=17 the current thread id: 9
i=18 the current thread id: 9
i=19 the current thread id: 9
i=20 the current thread id: 9
i=21 the current thread id: 9
i=22 the current thread id: 9
i=23 the current thread id: 9
i=6 the current thread id: 2
i=5 the current thread id: 8
i=7 the current thread id: 11
i=3 the current thread id: 10
i=4 the current thread id: 3
i=2 the current thread id: 4
i=1 the current thread id: 7
i=9 the current thread id: 0
i=10 the current thread id: 6
i=11 the current thread id: 5
i=12 the current thread id: 1
parallelProgram elapse time: 0.042588 seconds

Process finished with exit code 0

本文标签: staticscheduleOpenMPdynamicsize