Linux arch 5.5.9-arch1-2 #1 SMP PREEMPT Thu, 12 Mar 2020 23:01:33 +0000 x86_64 GNU/Linux
I have been experimenting with CPUs affinity and scheduling policies and there is one thing that has surprised me. My machine has 12 cores. I have set affinity for the program to the last 2 cores only. The only thing my application does is creating multiple threads, generating some fake load and dumping the timing results. The main part of the thread function looks as follows:
void set_affinity_real_time_thread(void)
{
cpu_set_t set;
uint32_t err;
CPU_ZERO(&set);
uint32_t i;
for (i = 1; i <= number_of_separated_cores; i++) { //number_of_separated_cores = 2
CPU_SET(number_of_cpus - i, &set);
}
err = sched_setaffinity(0, sizeof(set), &set);
if (err != 0) {
fprintf(stderr,
"%s line %d %s",
__func__,
__LINE__,
strerror(errno));
exit(EXIT_FAILURE);
}
}
// #define NUMBER_OF_TIMING_SAMPLES 10000
// #define NUMBER_OF_FOR_LOOP_ITERATIONS 1000000
// #define BASE_SLEEP_TIME_US 20
void *real_time_thread(void *arg)
{
const char *thread_type = "real_time";
uint32_t thread_number = *(uint32_t *)arg;
char thread_name[64];
sprintf(thread_name, "%s_thread_%d", thread_type, thread_number);
set_affinity_real_time_thread();
set_policy_and_priotiy_real_time_thread();
uint32_t timings[NUMBER_OF_TIMING_SAMPLES];
memset(timings, 0, sizeof(timings));
volatile int dummy = 0; // Used to simulate processing delay.
struct timeval start_time, end_time;
unsigned long processing_time;
useconds_t sleep_time = BASE_SLEEP_TIME_US * (10 * thread_number);
printf("%s started!\n", thread_name);
uint32_t i;
for (i = 0; i < NUMBER_OF_TIMING_SAMPLES; i++) {
uint32_t j;
gettimeofday(&start_time, NULL);
for (j = 0; j < NUMBER_OF_FOR_LOOP_ITERATIONS; j++) {
dummy++;
}
gettimeofday(&end_time, NULL);
processing_time = 1000000*end_time.tv_sec
+ end_time.tv_usec
- 1000000*start_time.tv_sec
- start_time.tv_usec;
if (processing_time > UINT32_MAX) {
fprintf(stderr,
"Processing time is too long!\n"
"Decrease number of for loop iterations\n.");
}
timings[i] = processing_time;
usleep(sleep_time);
}
pthread_barrier_wait(&barrier);
dump_thread_results(thread_name, timings);
printf("%s thread %d finished!\n", thread_type, thread_number);
}
I create 10 threads for tests. When I do not change the scheduling policy the behavior is what I expect. 10 threads run on 2 cores and the system remains fully interactive. However, when I set the policy to SCHED_FIFO in the following way:
void set_policy_and_priotiy_real_time_thread(void)
{
int max_pri = sched_get_priority_max(SCHED_FIFO);
struct sched_param param;
param.sched_priority = max_pri;
int err;
err = sched_setscheduler(0, SCHED_FIFO, ¶m);
if (err != 0) {
fprintf(stdout, "%s line %d %s", __func__, __LINE__, strerror(errno));
exit(EXIT_FAILURE);
}
}
the system behavior gets really strange. It becomes completely non interactive during the test. I can't input characters or move mouse cursor, the display is freezed. When the processing finishes everything gets back to normal. I do not understand this behavior. Only 2 cores are used for this processing (this is what I can see when I use conventional scheduling), there are still 10 cores left. Can anyone explain this?