// -*- Mode: c -*- // /* This is a replacement for the standard library profiling functions. I * have copied most of this code from the cygwin distribution. The "mcount" * function is new, and so is the startup/exit mechanism. */ #include #include #include // We must tell GCC not to instrument this function for profiling // Put here the profiling code #include #include #include #include #include // If DEBUG == 1 we print some start-up and exit info // If DEBUG == 2 we print also information on some mcount invocations // when something interesting happens // If DEBUG >= 3 we print information on all mcount invocations #define DEBUG 1 #ifndef O_BINARY #define O_BINARY 0 #endif #ifndef PROF_HZ #define PROF_HZ 100 #endif /* On Linux if we re-define the __monstartup and the _mcleanup symbols then * these take over the ones defined in the library. On other architectures, * we hook these functions in ourselves in initProfiler */ #if defined(x86_LINUX) #define MONSTARTUP __monstartup #define MONCLEANUP _mcleanup #define GMONOUT "gmon.out" #elif defined(x86_WIN32) #define MONSTARTUP my_monstartup #define MONCLEANUP my_mcleanup #define MUST_INIT_PROFILER 1 #define GMONOUT "gmon.out0" void MONSTARTUP(u_long low, u_long high); void MONCLEANUP(void); extern u_char etext asm ("etext"); extern u_char mainCRTStartup; extern void _monstartup(void); #else #error Unknown architecture #endif extern void aftermcount(void); extern void mcount(void); static int profilerInitialized = 0; // Whether we have loaded the table // The table contains the ranges we must skip #define MAX_RANGE 128 struct range { unsigned long low, high; } rangetable[MAX_RANGE]; int nextIdx = 0; // The index of the next free element extern u_char start_this_file; asm(".text"); asm("start_this_file:"); static struct range* findInTable(unsigned long pc) { int l = 0, h = nextIdx; // If the element exists its index is >= l and < h while(l < h) { int m = (l + h) >> 1; // l <= m < h if(pc < rangetable[m].low) { h = m; // make progress because m < h } else if(pc >= rangetable[m].high) { l = m + 1; // make progress because m + 1 > l } else { // m is the one we need return & rangetable[m]; } } return 0; } void my_mcleanup(void); void initProfiler() { // See if we have defined the magic environment variable char *f = getenv("PROFILE_SKIP_RANGE_FILE"); assert(! profilerInitialized); profilerInitialized = 1; #if DEBUG >= 1 fprintf(stderr, "Initializing the profiler\n"); #endif nextIdx = 0; if(f) { FILE *ff = fopen(f, "r"); if(ff) { while(! feof(ff)) { unsigned long low, high; int i; if(nextIdx >= MAX_RANGE) { fprintf(stderr, "Too many profile ranges"); break; } if(2 != fscanf(ff, "%ld-%ld ", &low, &high)) { fprintf(stderr, "Invalid line in profile ranges file %s\n", f); break; } #if DEBUG >= 3 fprintf(stderr, "Loaded profile skip range %ld-%ld\n", low, high); #endif // Insert it sorted in the table for(i=0;i= rangetable[next].low) { // Coalesce rangetable[current].high = rangetable[next].high; next ++; } else { // Do not coalesce current ++; if(current != next) { rangetable[current] = rangetable[next]; } next ++; } } nextIdx = current + 1; } } } /* Now we must call the initialization functions, if we are not on Linux */ #ifdef MUST_INIT_PROFILER #if DEBUG >= 1 fprintf(stderr, " calling the standard profiler startup function\n"); #endif MONSTARTUP((u_long) & mainCRTStartup, (u_long)&etext); atexit(& MONCLEANUP); #endif #if DEBUG >= 2 { int i; fprintf(stderr, "Profiler skip table is:\n"); for(i=0;i= 1 int log, len; char dbuf[200]; #endif #if DEBUG >= 1 fprintf(stderr, "Saving profile output in %s\n", GMONOUT); #endif if (p->state == GMON_PROF_ERROR) ERR("_mcleanup: tos overflow\n"); hz = PROF_HZ; my_moncontrol(0); profilerInitialized = 0; #ifdef nope if ((profdir = getenv("PROFDIR")) != NULL) { extern char *__progname; char *s, *t, *limit; pid_t pid; long divisor; /* If PROFDIR contains a null value, no profiling output is produced */ if (*profdir == '\0') { return; } limit = buf + sizeof buf - 1 - 10 - 1 - strlen(__progname) - 1; t = buf; s = profdir; while((*t = *s) != '\0' && t < limit) { t++; s++; } *t++ = '/'; /* * Copy and convert pid from a pid_t to a string. For * best performance, divisor should be initialized to * the largest power of 10 less than PID_MAX. */ pid = getpid(); divisor=10000; while (divisor > pid) divisor /= 10; /* skip leading zeros */ do { *t++ = (pid/divisor) + '0'; pid %= divisor; } while (divisor /= 10); *t++ = '.'; s = __progname; while ((*t++ = *s++) != '\0') ; proffile = buf; } else { proffile = GMONOUT; } #else { char gmon_out[] = GMONOUT; proffile = gmon_out; } #endif fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666); if (fd < 0) { perror( proffile ); return; } #if DEBUG >= 1 log = open("gmon.log", O_CREAT|O_TRUNC|O_WRONLY, 0664); if (log < 0) { perror("mcount: gmon.log"); return; } len = sprintf(dbuf, "[mcleanup1] kcount 0x%lx ssiz %ld\n", (u_long)p->kcount, p->kcountsize); write(log, dbuf, len); #endif hdr = (struct gmonhdr *)&gmonhdr; hdr->lpc = p->lowpc; hdr->hpc = p->highpc; hdr->ncnt = p->kcountsize + sizeof(gmonhdr); hdr->version = GMONVERSION; hdr->profrate = hz; write(fd, (char *)hdr, sizeof *hdr); /* Clear out the counts associated with PCs in this file */ { unsigned int first_idx = PCTOIDX(&mcount, p->lowpc); unsigned int last_idx = PCTOIDX(&aftermcount, p->lowpc); /*fprintf(stderr, "s_scale = %d, base = 0x%lx\n", s_scale, p->lowpc); fprintf(stderr, "findInTable at 0x%lx, mcount at 0x%lx, after 0x%lx\n", (u_long)&start_this_file, (u_long)&mcount, (u_long)&aftermcount);*/ if(first_idx >= p->kcountsize / sizeof(p->kcount[0]) || last_idx >= p->kcountsize / sizeof(p->kcount[0])) { fprintf(stderr, "Invalid indices into the profile table\n"); } else { #if DEBUG >= 2 fprintf(stderr, "Will clear from %d-%d\n", first_idx, last_idx); #endif for(;first_idx<=last_idx;first_idx++) { #if DEBUG >= 2 fprintf(stderr, "Clear index %d in count table: 0x%lx-0x%lx.\n", first_idx, IDXTOPC(first_idx,p->lowpc), IDXTOPC(first_idx+1,p->lowpc)); #endif p->kcount[first_idx] = 0; } } } write(fd, p->kcount, p->kcountsize); endfrom = p->fromssize / sizeof(*p->froms); for (fromindex = 0; fromindex < endfrom; fromindex++) { if (p->froms[fromindex] == 0) continue; frompc = p->lowpc; frompc += fromindex * p->hashfraction * sizeof(*p->froms); for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { #if DEBUG >= 1 len = sprintf(dbuf, "[mcleanup2] frompc 0x%lx selfpc 0x%lx count %ld\n" , frompc, p->tos[toindex].selfpc, p->tos[toindex].count); write(log, dbuf, len); #endif rawarc.raw_frompc = frompc; rawarc.raw_selfpc = p->tos[toindex].selfpc; rawarc.raw_count = p->tos[toindex].count; write(fd, &rawarc, sizeof rawarc); } } close(fd); } // This will be called from the main library void MONSTARTUP(lowpc, highpc) u_long lowpc; u_long highpc; { register int o; char *cp; struct gmonparam *p = &_my_gmonparam; #if DEBUG >= 1 fprintf(stderr, "You called monstartup(LOW=0x%lx, HIGH=0x%lx)\n", lowpc, highpc); #endif /* * round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. */ p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); p->textsize = p->highpc - p->lowpc; p->kcountsize = p->textsize / HISTFRACTION; p->hashfraction = HASHFRACTION; p->fromssize = p->textsize / p->hashfraction; p->tolimit = p->textsize * ARCDENSITY / 100; if (p->tolimit < MINARCS) p->tolimit = MINARCS; else if (p->tolimit > MAXARCS) p->tolimit = MAXARCS; p->tossize = p->tolimit * sizeof(struct tostruct); cp = fake_sbrk(p->kcountsize + p->fromssize + p->tossize); if (cp == (char *)-1) { ERR("monstartup: out of memory\n"); return; } #ifdef notdef bzero(cp, p->kcountsize + p->fromssize + p->tossize); #endif p->tos = (struct tostruct *)cp; cp += p->tossize; p->kcount = (u_short *)cp; cp += p->kcountsize; p->froms = (u_short *)cp; /* XXX minbrk needed? */ //minbrk = fake_sbrk(0); p->tos[0].link = 0; o = p->highpc - p->lowpc; if (p->kcountsize < o) { #ifndef notdef s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; #else /* avoid floating point */ int quot = o / p->kcountsize; if (quot >= 0x10000) s_scale = 1; else if (quot >= 0x100) s_scale = 0x10000 / quot; else if (o >= 0x800000) s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); else s_scale = 0x1000000 / ((o << 8) / p->kcountsize); #endif } else s_scale = SCALE_1_TO_1; my_moncontrol(1); } /* * Control profiling * profiling is what mcount checks to see if * all the data structures are ready. */ void my_moncontrol(mode) int mode; { struct gmonparam *p = &_my_gmonparam; if (mode) { /* start */ profil((u_short *)p->kcount, p->kcountsize, p->lowpc, s_scale); p->state = GMON_PROF_ON; } else { /* stop */ profil((u_short *)0, 0, 0, 0); p->state = GMON_PROF_OFF; } } inline static void _mcount(p, frompc, selfpc) /* _mcount; may be static, inline, etc */ register struct gmonparam *p; register u_long frompc, selfpc; { register u_short *frompcindex; register struct tostruct *top, *prevtop; register long toindex; frompc -= p->lowpc; #if (HASHFRACTION & (HASHFRACTION - 1)) == 0 if (p->hashfraction == HASHFRACTION) frompcindex = &p->froms[frompc / (HASHFRACTION * sizeof(*p->froms))]; else #endif frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: p->state = GMON_PROF_ON; return; overflow: p->state = GMON_PROF_ERROR; return; } /* I have no idea how to find the backtrace in the Ocaml native code * compilation. So, we maintain a stack outselves. We put on the stack only * the functions we must skip. */ static struct stackentry { struct range range; // Range of the PC for the function u_long frompc; // The address it was called from u_long frame_address; // The frame address for this function } stack[128]; static struct stackentry *stackTop = &stack[0]; /* The top of the stack. There is always an * element with 0 */ void mcount() { register struct gmonparam *p; register u_long frompc, selfpc; register u_long frame_address; struct range *selfrange; if(! profilerInitialized) initProfiler (); // This code was copied from _mcount. We moved it here because we want // to avoid expensive table lookups if not necessary p = &_my_gmonparam; /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) { #if DEBUG >= 2 fprintf(stderr, "already profiling\n"); #endif return; } p->state = GMON_PROF_BUSY; selfpc = (unsigned long)__builtin_return_address(0); frompc = (unsigned long)__builtin_return_address(1); #if DEBUG >= 3 fprintf(stderr, "mcount 0x%lx -> 0x%lx.\n", frompc, selfpc); #endif // See if we need to pop some things off the stack frame_address = (u_long)__builtin_frame_address(0); while(stackTop > stack && frame_address >= stackTop->frame_address) { #if DEBUG >= 2 fprintf(stderr, "Poping profiling stack entry 0x%lx\n", stackTop->frame_address); #endif stackTop --; } /* See if we need to change the frompc to pretend we are coming from * somewhere else */ if(frompc >= stackTop->range.low && frompc < stackTop->range.high) { // This means we are calling from a function that we have to skip #if DEBUG >= 2 fprintf(stderr, "Changing FROM=0x%lx (0x%lx->0x%lx).\n", stackTop->frompc, frompc, selfpc); #endif // Pretend we are coming directly from grandparent frompc = stackTop->frompc; } /* See if we must push a new entry on the stack, for a SELFPC that we must * skip */ selfrange = findInTable(selfpc); if(selfrange) { if(stackTop->frompc != frompc || stackTop->range.low != selfrange->low || stackTop->range.high != selfrange->high) { #if DEBUG >= 2 fprintf(stderr, "Pushing profiling entry (0x%lx-0x%lx) frame 0x%lx, from 0x%lx\n", selfrange->low, selfrange->high, frame_address, frompc); #endif stackTop ++; if(stackTop - stack >= sizeof(stack) / sizeof(stack[0])) { fprintf(stderr, "Profiling stack overflow\n"); stackTop = stack; } stackTop->range = *selfrange; stackTop->frompc = frompc; stackTop->frame_address = frame_address; } } /* Finally, ignore those cases when the FROMPC is not in the range that we * care about */ if (frompc - p->lowpc > p->textsize) { #if DEBUG >= 2 fprintf(stderr, "ignoring FROMPC 0x%lx out of range (SELF=0x%lx)\n", frompc, selfpc); #endif p->state = GMON_PROF_ON; return; } _mcount(p, frompc, selfpc); // Will reset p->state } void aftermcount(void) { // Define this function only to be able to get a PC that is after mcount return; }