/* * This file contains a copy of cputime_adjust() and related functions from * linux-next(next-20150610) in addition to stubs and a driver to verify * and demonstrate some problems in the scaling mechanism in addition to * some concurrency problems. * * The functions copied are instrumented with a call to emulate_preemption() * in order to be able to easly demonstrate the concurrency problem, other * then that they are identical to the ones in the kernel. * * Compile with: gcc -o testadjust testadjust.c * Run with: ./testadjust * And expect: * 0.0 sum_exec=100000000000 utime=0 stime=1 => 0.0 tot=10000 user=0 sys=10000 =====> OK * 0.1 sum_exec=101000000000 utime=100 stime=1 => 0.1 tot=20000 user=10000 sys=10000 =====> FAIL * * 1.0 sum_exec=100000000000 utime=1 stime=0 => 1.0 tot=10000 user=10000 sys=0 =====> OK * 1.1 sum_exec=101000000000 utime=1 stime=100 => 1.1 tot=20000 user=10000 sys=10000 =====> FAIL * * 2.0 sum_exec=100000000000 utime=1 stime=1 => 2.0 tot=10000 user=5000 sys=5000 =====> OK * 2.1 sum_exec=101000000000 utime=100 stime=1 => 2.1 tot=15000 user=10000 sys=5000 =====> FAIL * * 3.0 sum_exec=100000000000 utime=1 stime=1 => <> * 3.1 sum_exec=101000000000 utime=100 stime=1 => 3.1 tot=10100 user=10000 sys=100 =====> OK * <> 3.0 tot=15000 user=10000 sys=5000 =====> FAIL * */ #include #include /* Use (or don't use) the fix proposed by peterz on lkml */ #define PETERZ_FIX 0 /***************************************************************************** * * The code below is stubs and implementations of macros and functions used * by cputime_adjust() and friends. * ***************************************************************************** */ typedef uint32_t u32; typedef uint64_t u64; typedef uint64_t cputime_t; /* Some macro definitions used by cputime_adjust() and friends copied or stubbed */ #define __force #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) /* No real concurrency, so we can use a trivial implementation. */ #define READ_ONCE(x) x /* Structs copied from kernel code */ struct cputime { cputime_t utime; cputime_t stime; }; struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; /* We are running in a standard glibc execution environment, so just rely on * the standard 64 bit arithmetics. */ u64 div_u64(u64 a, u64 b) { return a/b; } /* We have no real concurrency in the test so we can make a * simple version of cmpxchg */ int cmpxchg_cputime(cputime_t *counter, cputime_t old, cputime_t new) { cputime_t ret = *counter; (void)old; *counter = new; return ret; } /* Convert nanoseconds to jiffies, this assumes HZ=100 */ cputime_t nsecs_to_cputime(u64 ns) { return (cputime_t)(ns/(1000000000/100)); } void emulate_preemption(int preemption_position); /***************************************************************************** * * The code below is copied verbatim from the kernel, except: * * - The additional call to emulate_preemption() from cputime_adjust() * - The code within #if PETERZ_FIX in cputime_adjust() is a fix proposed by peterz * ***************************************************************************** */ /* * Perform (stime * rtime) / total, but avoid multiplication overflow by * loosing precision when the numbers are big. */ static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) { u64 scaled; for (;;) { /* Make sure "rtime" is the bigger of stime/rtime */ if (stime > rtime) swap(rtime, stime); /* Make sure 'total' fits in 32 bits */ if (total >> 32) goto drop_precision; /* Does rtime (and thus stime) fit in 32 bits? */ if (!(rtime >> 32)) break; /* Can we just balance rtime/stime rather than dropping bits? */ if (stime >> 31) goto drop_precision; /* We can grow stime and shrink rtime and try to make them both fit */ stime <<= 1; rtime >>= 1; continue; drop_precision: /* We drop from rtime, it has more bits than stime */ rtime >>= 1; total >>= 1; } /* * Make sure gcc understands that this is a 32x32->64 multiply, * followed by a 64/32->64 divide. */ scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); return (__force cputime_t) scaled; } /* * Atomically advance counter to the new value. Interrupts, vcpu * scheduling, and scaling inaccuracies can cause cputime_advance * to be occasionally called with a new value smaller than counter. * Let's enforce atomicity. * * Normally a caller will only go through this loop once, or not * at all in case a previous caller updated counter the same jiffy. */ static void cputime_advance(cputime_t *counter, cputime_t new) { cputime_t old; while (new > (old = READ_ONCE(*counter))) cmpxchg_cputime(counter, old, new); } /* * Adjust tick based cputime random precision against scheduler * runtime accounting. */ static void cputime_adjust(struct task_cputime *curr, struct cputime *prev, cputime_t *ut, cputime_t *st) { cputime_t rtime, stime, utime; /* * Tick based cputime accounting depend on random scheduling * timeslices of a task to be interrupted or not by the timer. * Depending on these circumstances, the number of these interrupts * may be over or under-optimistic, matching the real user and system * cputime with a variable precision. * * Fix this by scaling these tick based values against the total * runtime accounted by the CFS scheduler. */ rtime = nsecs_to_cputime(curr->sum_exec_runtime); /* * Update userspace visible utime/stime values only if actual execution * time is bigger than already exported. Note that can happen, that we * provided bigger values due to scaling inaccuracy on big numbers. */ if (prev->stime + prev->utime >= rtime) goto out; stime = curr->stime; utime = curr->utime; if (utime == 0) { stime = rtime; } else if (stime == 0) { utime = rtime; } else { cputime_t total = stime + utime; stime = scale_stime((__force u64)stime, (__force u64)rtime, (__force u64)total); #if PETERZ_FIX // PETERZ:s proposed fix if (stime < prev->stime) stime = prev->stime; #endif emulate_preemption(0); utime = rtime - stime; } cputime_advance(&prev->stime, stime); cputime_advance(&prev->utime, utime); out: *ut = prev->utime; *st = prev->stime; } /***************************************************************************** * * The code below is the actual test driver. The tests struct has a number of * testcases each with a sequence of inputs to cputime_adjust() after each step * we verify that the reported system plus user time is equal to the provided * sum_exec_runtime. * ***************************************************************************** */ #define NSEC_PER_SEC 1000000000ULL static const struct { unsigned long long sum_exec_runtime; cputime_t utime; cputime_t stime; int preempt; } tests[][10] = { {{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 0, .stime = 1 }, { .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 }, { .sum_exec_runtime = 0}, }, {{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 0 }, { .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 1, .stime = 100 }, { .sum_exec_runtime = 0}, }, {{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 1 }, { .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 }, { .sum_exec_runtime = 0}, }, {{ .sum_exec_runtime = 100*NSEC_PER_SEC, .utime = 1, .stime = 1, .preempt = 1}, { .sum_exec_runtime = 101*NSEC_PER_SEC, .utime = 100, .stime = 1 }, { .sum_exec_runtime = 0}, } }; /* The global state of the test machinery */ static int testno; static int step; static struct cputime prev; /* Execute the current testno/step and validate the results */ void do_step(void) { struct task_cputime t; cputime_t ut,st; int tno,s; tno = testno; s = step; t.sum_exec_runtime = tests[tno][s].sum_exec_runtime; t.utime = tests[tno][s].utime; t.stime = tests[tno][s].stime; printf("%d.%d sum_exec=%-10lld utime=%-5lld stime=%-5lld => ", tno, s, (unsigned long long)t.sum_exec_runtime, (unsigned long long)t.utime, (unsigned long long)t.stime); cputime_adjust(&t, &prev, &ut, &st); printf(" %d.%d tot=%-10llu user=%-5llu sys=%-5llu", tno, s, (unsigned long long)(ut+st), (unsigned long long)ut, (unsigned long long)st); if((ut + st) != nsecs_to_cputime(t.sum_exec_runtime)) printf(" =====> FAIL\n"); else printf(" =====> OK\n"); } /* Called from the preemption point in cputime_adjust() */ void emulate_preemption(int pos) { if((1 << pos) && tests[testno][step].preempt) { printf("<>\n"); step++; do_step(); printf("<>%38s",""); } } int main(void) { for(testno = 0; testno < (int)(sizeof(tests)/sizeof(tests[0])); testno++) { prev.utime = 0; prev.stime = 0; step = 0; printf("\n"); while(tests[testno][step].sum_exec_runtime != 0) { do_step(); step++; } } return 0; }