Following Week 7’s implementation of the TASKLOOP
, TEAMS
, and DISTRIBUTE
constructs, Week 8 focused on the schedule
clause with its various modes, the num_threads
clause, and the atomic
construct. Last week, I planned to optimize those constructs, but I extended the work to these features for better loop handling. This week, I completed these implementations via#8039, spending about 29 hours to ensure they integrate well with existing OpenMP support.
Implementation Details and Bug Fix
This week, I added the schedule
clause to control how loop iterations are divided among threads. I supported all modes from the OpenMP 6.0 reference: static
(divides iterations into equal chunks assigned in round-robin), dynamic
(threads request chunks as they finish), guided
(like dynamic but chunks decrease over time), runtime
(uses the run-sched-var ICV), and auto
(compiler or runtime decides). I also implemented the num_threads
clause to dynamically set the number of threads for a parallel
region at runtime. The atomic
construct was added to ensure thread-safe updates. Additionally, I fixed a bug in ASR generation for nested IF statements inside nested pragmas, which was tested in existing cases like openmp_65.f90
.
Examples: SCHEDULE, NUM_THREADS, and ATOMIC Constructs
Below are the seven MREs I compiled and ran successfully to test the new constructs.
View MRE for SCHEDULE(STATIC)
(openmp_63.f90)
1program openmp_63
2 use omp_lib
3 implicit none
4 integer, parameter :: n = 100
5 integer, parameter :: max_threads = 8
6 integer :: i, tid, nthreads
7 integer :: thread_iterations(1:max_threads) = 0
8 integer :: thread_first(1:max_threads) = 1000
9 integer :: thread_last(1:max_threads) = -1
10 integer :: expected_chunk_size
11 logical :: test_passed = .true.
12
13 call omp_set_num_threads(4)
14 nthreads=0
15
16 !$omp parallel private(tid)
17 !$omp single
18 nthreads = omp_get_num_threads()
19 !$omp end single
20 !$omp end parallel
21 print *, "Testing STATIC schedule with", nthreads, "threads"
22
23 !$omp parallel do schedule(static) private(tid)
24 do i = 1, n
25 tid = omp_get_thread_num() + 1
26 !$omp critical
27 thread_iterations(tid) = thread_iterations(tid) + 1
28 if (i < thread_first(tid)) thread_first(tid) = i
29 if (i > thread_last(tid)) thread_last(tid) = i
30 !$omp end critical
31 end do
32 !$omp end parallel do
33
34 print*, thread_first(1:nthreads)
35 print*, thread_last(1:nthreads)
36 print*, thread_iterations(1:nthreads)
37
38 print *, "=== STATIC Schedule Results ==="
39 expected_chunk_size = (n + nthreads - 1) / nthreads
40
41 do i = 1, nthreads
42 print '(A,I1,A,I3,A,I3,A,I3)', &
43 "Thread ", i, ": iterations=", thread_iterations(i), &
44 ", first=", thread_first(i), ", last=", thread_last(i)
45
46 if (thread_iterations(i) > 0) then
47 if (thread_last(i) - thread_first(i) + 1 /= thread_iterations(i)) then
48 print *, "ERROR: Thread", i, "did not get contiguous iterations!"
49 test_passed = .false.
50 end if
51 if (abs(thread_iterations(i) - expected_chunk_size) > 1) then
52 print *, "ERROR: Thread", i, "chunk size deviates too much!"
53 test_passed = .false.
54 end if
55 end if
56 end do
57
58 if (.not. test_passed) then
59 error stop "STATIC schedule test FAILED!"
60 end if
61 print *, "STATIC schedule test PASSED!"
62end program openmp_63
View MRE for SCHEDULE(DYNAMIC)
(openmp_64.f90)
1program openmp_64
2 use omp_lib
3 implicit none
4 integer, parameter :: n = 100
5 integer :: i, tid, j
6 real :: delay
7 integer :: thread_iterations(0:7) = 0
8 integer :: iteration_order(n)
9 integer :: order_counter
10 integer :: consecutive_count, max_consecutive
11 real :: work_array(n)
12
13 call omp_set_num_threads(4)
14 order_counter = 0
15
16 print *, "=== DYNAMIC Schedule Test ==="
17
18 !$omp parallel do schedule(dynamic, 1) private(tid, delay)
19 do i = 1, n
20 tid = omp_get_thread_num()
21
22 if (mod(i, 10) == 0) then
23 delay = 0.0
24 do j = 1, 1000
25 delay = delay + sin(real(j))
26 end do
27 work_array(i) = delay
28 end if
29
30 !$omp critical
31 thread_iterations(tid) = thread_iterations(tid) + 1
32 order_counter = order_counter + 1
33 iteration_order(order_counter) = tid
34 !$omp end critical
35 end do
36 !$omp end parallel do
37
38 print *, "Thread iteration counts:"
39 do i = 0, omp_get_max_threads()-1
40 print *, "Thread", i, ":", thread_iterations(i), "iterations"
41 end do
42
43 max_consecutive = 0
44 consecutive_count = 1
45
46 do i = 2, n
47 if (iteration_order(i) == iteration_order(i-1)) then
48 consecutive_count = consecutive_count + 1
49 else
50 if (consecutive_count > max_consecutive) then
51 max_consecutive = consecutive_count
52 end if
53 consecutive_count = 1
54 end if
55 end do
56
57 print *, "Maximum consecutive iterations by same thread:", max_consecutive
58
59 if (max_consecutive > 10) then
60 print *, "WARNING: Dynamic schedule showing large consecutive blocks"
61 end if
62
63 print *, "DYNAMIC schedule test completed"
64end program openmp_64
View MRE for SCHEDULE(GUIDED)
(openmp_65.f90)
1program openmp_65
2 use omp_lib
3 implicit none
4 integer, parameter :: n = 1000
5 integer :: i, tid, j
6 integer :: chunk_count
7 integer :: chunk_size_array(100) = 0
8 integer :: chunk_thread(100) = -1
9 integer :: current_pos
10 integer :: thread_iterations(0:7) = 0
11 logical :: test_passed = .true.
12 logical :: decreasing_trend = .true.
13 integer :: last_thread = -1
14 integer :: current_chunk_size
15 integer :: iterations_done
16 call omp_set_num_threads(4)
17
18 print *, "=== GUIDED Schedule Test ==="
19 print *, "Iterations:", n, "Threads:", omp_get_max_threads()
20 chunk_count=0
21 current_chunk_size =0
22 iterations_done = 0
23 current_pos = 1
24 !$omp parallel private(tid)
25 !$omp do schedule(guided)
26 do i = 1, n
27 tid = omp_get_thread_num()
28
29 !$omp critical
30 if (i == current_pos) then
31 chunk_count = chunk_count + 1
32 chunk_thread(chunk_count) = tid
33 do j = i, n
34 if (j == n) then
35 chunk_size_array(chunk_count) = j - i + 1
36 current_pos = n + 1
37 exit
38 end if
39 end do
40 end if
41 thread_iterations(tid) = thread_iterations(tid) + 1
42 !$omp end critical
43 end do
44 !$omp end do
45 !$omp end parallel
46
47 chunk_count = 0
48 current_pos = 1
49
50 !$omp parallel private(tid, i)
51 tid = omp_get_thread_num()
52 if (tid == 0) then
53 do while (iterations_done < n)
54 chunk_count = chunk_count + 1
55 current_chunk_size = max(1, (n - iterations_done) / (2 * omp_get_num_threads()))
56 chunk_size_array(chunk_count) = current_chunk_size
57 iterations_done = iterations_done + current_chunk_size
58 end do
59 end if
60 !$omp end parallel
61
62 print *, "Expected guided chunk sizes (first 10):"
63 do i = 1, min(10, chunk_count)
64 print *,"Chunk ", i, ": size = ", chunk_size_array(i)
65 end do
66
67 do i = 2, min(chunk_count-1, 20)
68 if (chunk_size_array(i) > chunk_size_array(i-1) * 1.5) then
69 decreasing_trend = .false.
70 end if
71 end do
72
73 if (chunk_count > 5) then
74 if (chunk_size_array(1) < chunk_size_array(chunk_count-2) * 2) then
75 print *, "ERROR: Guided schedule not showing expected decreasing chunk sizes!"
76 test_passed = .false.
77 end if
78 end if
79
80 if (.not. test_passed) then
81 error stop "GUIDED schedule test FAILED!"
82 end if
83
84 if (.not. decreasing_trend) then
85 print *, "WARNING: Guided chunks did not show clear decreasing trend"
86 else
87 print *, "Guided schedule showing expected decreasing chunk pattern"
88 end if
89
90 print *, "GUIDED schedule test completed"
91end program openmp_65
View MRE for ATOMIC
(openmp_66.f90)
1program openmp_66
2 implicit none
3 integer, parameter :: N = 100
4 integer :: i, sum_expected, sum_actual
5
6 sum_actual = 0
7 sum_expected = (N*(N+1)) / 2
8
9 !$omp parallel do private(i)
10 do i = 1, N
11 !$omp atomic
12 sum_actual = sum_actual + i
13 !$omp end atomic
14 end do
15 !$omp end parallel do
16
17 if (sum_actual /= sum_expected) then
18 print *, 'Error: Incorrect result from atomic addition.'
19 print *, 'Expected:', sum_expected, ' Got:', sum_actual
20 error stop
21 else
22 print *, 'Test passed: atomic addition is correct. Sum =', sum_actual
23 end if
24end program openmp_66
View MRE for Schedule Comparison (openmp_67.f90)
1program openmp_67
2 use omp_lib
3 implicit none
4 integer, parameter :: n = 10000
5 integer :: i, j
6 real :: static_time, dynamic_time, guided_time
7 double precision :: start_time
8 real :: a(n), b(n), c(n)
9
10 call omp_set_num_threads(4)
11
12 print *, "=== Schedule Comparison Test ==="
13 print *, "Comparing performance of different schedules"
14 print *, "Array size:", n, "Threads:", omp_get_max_threads()
15
16 do i = 1, n
17 b(i) = real(i)
18 c(i) = real(n - i + 1)
19 end do
20
21 start_time = omp_get_wtime()
22 !$omp parallel do schedule(static)
23 do i = 1, n
24 a(i) = sqrt(b(i)) + log(abs(c(i)) + 1.0)
25 end do
26 !$omp end parallel do
27 static_time = omp_get_wtime() - start_time
28
29 start_time = omp_get_wtime()
30 !$omp parallel do schedule(dynamic)
31 do i = 1, n
32 a(i) = sqrt(b(i)) + log(abs(c(i)) + 1.0)
33 end do
34 !$omp end parallel do
35 dynamic_time = omp_get_wtime() - start_time
36
37 start_time = omp_get_wtime()
38 !$omp parallel do schedule(guided)
39 do i = 1, n
40 a(i) = sqrt(b(i)) + log(abs(c(i)) + 1.0)
41 end do
42 !$omp end parallel do
43 guided_time = omp_get_wtime() - start_time
44
45 print '(A,F8.6,A)', "STATIC time: ", static_time, " seconds"
46 print '(A,F8.6,A)', "DYNAMIC time: ", dynamic_time, " seconds"
47 print '(A,F8.6,A)', "GUIDED time: ", guided_time, " seconds"
48
49 if (static_time < dynamic_time * 0.9 .and. static_time < guided_time * 0.9) then
50 print *, "✓ STATIC is fastest for uniform workload"
51 else if (dynamic_time < static_time * 0.9) then
52 print *, "! DYNAMIC is fastest"
53 else if (guided_time < static_time * 0.9) then
54 print *, "! GUIDED is fastest"
55 else
56 print *, "- All schedules perform similarly"
57 end if
58
59 do i = 1, n
60 if (mod(i, 10) == 0) then
61 b(i) = b(i) * 1000.0
62 end if
63 end do
64
65 start_time = omp_get_wtime()
66 !$omp parallel do schedule(static)
67 do i = 1, n
68 if (mod(i, 10) == 0) then
69 a(i) = 0.0
70 do j = 1, 100
71 a(i) = a(i) + sqrt(b(i)) + log(abs(c(i)) + 1.0)
72 end do
73 else
74 a(i) = sqrt(b(i)) + log(abs(c(i)) + 1.0)
75 end if
76 end do
77 !$omp end parallel do
78 static_time = omp_get_wtime() - start_time
79
80 start_time = omp_get_wtime()
81 !$omp parallel do schedule(dynamic)
82 do i = 1, n
83 if (mod(i, 10) == 0) then
84 a(i) = 0.0
85 do j = 1, 100
86 a(i) = a(i) + sqrt(b(i)) + log(abs(c(i)) + 1.0)
87 end do
88 else
89 a(i) = sqrt(b(i)) + log(abs(c(i)) + 1.0)
90 end if
91 end do
92 !$omp end parallel do
93 dynamic_time = omp_get_wtime() - start_time
94
95 print *, ""
96 print *, "Non-uniform workload results:"
97 print '(A,F8.6,A)', "STATIC time: ", static_time, " seconds"
98 print '(A,F8.6,A)', "DYNAMIC time: ", dynamic_time, " seconds"
99end program openmp_67
View MRE for SCHEDULE(DYNAMIC, 5)
(openmp_68.f90)
1program openmp_68
2 use omp_lib
3 implicit none
4 integer, parameter :: n = 100
5 integer :: i, tid, j
6 integer :: thread_chunks(0:7) = 0
7 integer :: chunk_sizes(100) = 0
8 integer :: chunk_count
9 integer :: current_iteration
10 integer :: chunk_start(100), chunk_thread(100)
11 logical :: test_passed = .true.
12 logical :: looks_like_static = .true.
13 current_iteration = 1
14 chunk_count = 0
15 call omp_set_num_threads(4)
16
17 print *, "=== DYNAMIC Schedule with chunk=", 5, "==="
18
19 !$omp parallel private(tid)
20 !$omp do schedule(dynamic, 5)
21 do i = 1, n
22 tid = omp_get_thread_num()
23
24 !$omp critical
25 if (i == current_iteration) then
26 chunk_count = chunk_count + 1
27 chunk_start(chunk_count) = i
28 chunk_thread(chunk_count) = tid
29 thread_chunks(tid) = thread_chunks(tid) + 1
30 if (i + 5 - 1 <= n) then
31 chunk_sizes(chunk_count) = 5
32 current_iteration = i + 5
33 else
34 chunk_sizes(chunk_count) = n - i + 1
35 current_iteration = n + 1
36 end if
37 end if
38 !$omp end critical
39 end do
40 !$omp end do
41 !$omp end parallel
42
43 print *, "Total chunks distributed:", chunk_count
44 print *, "Chunks per thread:"
45 do i = 0, omp_get_max_threads()-1
46 print *, "Thread", i, ":", thread_chunks(i), "chunks"
47 end do
48
49 do i = 1, chunk_count-1
50 if (chunk_sizes(i) /= 5) then
51 print *, "ERROR: Chunk", i, "has size", chunk_sizes(i), "expected", 5
52 test_passed = .false.
53 end if
54 end do
55
56 if (chunk_sizes(chunk_count) > 5) then
57 print *, "ERROR: Last chunk too large!"
58 test_passed = .false.
59 end if
60
61 do i = 2, min(chunk_count, 8)
62 if (chunk_thread(i) /= mod(chunk_thread(1) + i - 1, omp_get_max_threads())) then
63 looks_like_static = .false.
64 exit
65 end if
66 end do
67
68 if (looks_like_static .and. chunk_count > 4) then
69 print *, "WARNING: Dynamic schedule showing static-like round-robin pattern!"
70 end if
71
72 if (.not. test_passed) then
73 error stop "DYNAMIC chunk schedule test FAILED!"
74 end if
75 print *, "DYNAMIC chunk schedule test completed"
76end program openmp_68
View MRE for NUM_THREADS
(openmp_69.f90)
1program openmp_69
2 use omp_lib
3 implicit none
4 integer :: flags(4)
5 integer :: i
6
7 flags = 0
8
9 !$omp parallel num_threads(4) private(i)
10 i = omp_get_thread_num()
11 !$omp critical
12 flags(i+1) = 1
13 !$omp end critical
14 !$omp end parallel
15
16 do i = 1, 4
17 if (flags(i) /= 1) then
18 print *, 'Error: Thread ', i-1, ' did not execute!'
19 error stop
20 end if
21 end do
22
23 print *, 'Test passed: num_threads(', 4, ') used correctly.'
24end program openmp_69
Next Steps
For Week 9, I plan to:
Target Offloading
at CI.