X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=gcc%2Fconfig%2Fh8300%2Flib1funcs.asm;fp=gcc%2Fconfig%2Fh8300%2Flib1funcs.asm;h=1b75b73269df9f63a7226aa9a4d56a348a6c3167;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=a5a9f9559d58ce4970986765aff0b8d4a9475f88;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm index a5a9f955..1b75b732 100644 --- a/gcc/config/h8300/lib1funcs.asm +++ b/gcc/config/h8300/lib1funcs.asm @@ -1,31 +1,28 @@ -;; libgcc routines for the Hitachi H8/300 CPU. +;; libgcc routines for the Renesas H8/300 CPU. ;; Contributed by Steve Chamberlain +;; Optimizations by Toshiyasu Morita -/* Copyright (C) 1994, 2000, 2001 Free Software Foundation, Inc. +/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009 + Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +Free Software Foundation; either version 3, or (at your option) any later version. -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ /* Assembler register definitions. */ @@ -58,9 +55,6 @@ Boston, MA 02111-1307, USA. */ #define S2H r6h #ifdef __H8300__ -#define MOVP mov.w /* pointers are 16 bits */ -#define ADDP add.w -#define CMPP cmp.w #define PUSHP push #define POPP pop @@ -73,10 +67,7 @@ Boston, MA 02111-1307, USA. */ #define S2P r6 #endif -#if defined (__H8300H__) || defined (__H8300S__) -#define MOVP mov.l /* pointers are 32 bits */ -#define ADDP add.l -#define CMPP cmp.l +#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) #define PUSHP push.l #define POPP pop.l @@ -95,12 +86,27 @@ Boston, MA 02111-1307, USA. */ #endif #ifdef __H8300H__ +#ifdef __NORMAL_MODE__ + .h8300hn +#else .h8300h #endif +#endif #ifdef __H8300S__ +#ifdef __NORMAL_MODE__ + .h8300sn +#else .h8300s #endif +#endif +#ifdef __H8300SX__ +#ifdef __NORMAL_MODE__ + .h8300sxn +#else + .h8300sx +#endif +#endif #ifdef L_cmpsi2 #ifdef __H8300__ @@ -108,24 +114,21 @@ Boston, MA 02111-1307, USA. */ .align 2 .global ___cmpsi2 ___cmpsi2: - cmp.w A2,A0 - bne .L2 - cmp.w A3,A1 + cmp.w A0,A2 bne .L2 + cmp.w A1,A3 + bne .L4 mov.w #1,A0 rts .L2: - cmp.w A0,A2 - bgt .L4 - bne .L3 - cmp.w A1,A3 - bls .L3 -.L4: - sub.w A0,A0 - rts + bgt .L5 .L3: mov.w #2,A0 + rts +.L4: + bls .L3 .L5: + sub.w A0,A0 rts .end #endif @@ -137,24 +140,21 @@ ___cmpsi2: .align 2 .global ___ucmpsi2 ___ucmpsi2: - cmp.w A2,A0 - bne .L2 - cmp.w A3,A1 + cmp.w A0,A2 bne .L2 + cmp.w A1,A3 + bne .L4 mov.w #1,A0 rts .L2: - cmp.w A0,A2 - bhi .L4 - bne .L3 - cmp.w A1,A3 - bls .L3 -.L4: - sub.w A0,A0 - rts + bhi .L5 .L3: mov.w #2,A0 + rts +.L4: + bls .L3 .L5: + sub.w A0,A0 rts .end #endif @@ -167,7 +167,7 @@ ___ucmpsi2: ;; "supporting routines". ; general purpose normalize routine -; +; ; divisor in A0 ; dividend in A1 ; turns both into +ve numbers, and leaves what the answer sign @@ -177,32 +177,30 @@ ___ucmpsi2: .section .text .align 2 divnorm: - mov.b #0x0,A2L or A0H,A0H ; is divisor > 0 - bge _lab1 + stc ccr,A2L + bge _lab1 not A0H ; no - then make it +ve not A0L - adds #1,A0 - xor #0x1,A2L ; and remember that in A2L + adds #1,A0 _lab1: or A1H,A1H ; look at dividend - bge _lab2 + bge _lab2 not A1H ; it is -ve, make it positive not A1L adds #1,A1 - xor #0x1,A2L; and toggle sign of result + xor #0x8,A2L; and toggle sign of result _lab2: rts ;; Basically the same, except that the sign of the divisor determines ;; the sign. modnorm: - mov.b #0x0,A2L or A0H,A0H ; is divisor > 0 - bge _lab7 + stc ccr,A2L + bge _lab7 not A0H ; no - then make it +ve not A0L - adds #1,A0 - xor #0x1,A2L ; and remember that in A2L + adds #1,A0 _lab7: or A1H,A1H ; look at dividend - bge _lab8 + bge _lab8 not A1H ; it is -ve, make it positive not A1L adds #1,A1 @@ -214,12 +212,12 @@ _lab8: rts ___divhi3: bsr divnorm bsr ___udivhi3 -negans: or A2L,A2L ; should answer be negative ? +negans: btst #3,A2L ; should answer be negative ? beq _lab4 not A0H ; yes, so make it so not A0L adds #1,A0 -_lab4: rts +_lab4: rts ; A0=A0%A1 signed @@ -256,13 +254,13 @@ ___umodhi3: .global ___udivhi3 ___udivhi3: - ; A0 A1 A2 A3 + ; A0 A1 A2 A3 ; Nn Dd P - sub.w A3,A3 ; Nn Dd xP 00 - or A1H,A1H + sub.w A3,A3 ; Nn Dd xP 00 + or A1H,A1H bne divlongway - or A0H,A0H - beq _lab6 + or A0H,A0H + beq _lab6 ; we know that D == 0 and N is != 0 mov.b A0H,A3L ; Nn Dd xP 0N @@ -274,7 +272,7 @@ _lab6: mov.b A0L,A3L ; n mov.b A3L,A0L ; Qq mov.b A3H,A3L ; m mov.b #0x0,A3H ; Qq 0m - rts + rts ; D != 0 - which means the denominator is ; loop around to get the result. @@ -285,19 +283,19 @@ divlongway: mov.b #0x8,A2H ; 8 div8: add.b A0L,A0L ; n*=2 rotxl A3L ; Make remainder bigger - rotxl A3H + rotxl A3H sub.w A1,A3 ; Q-=N bhs setbit ; set a bit ? add.w A1,A3 ; no : too far , Q+=N - dec A2H - bne div8 ; next bit - rts + dec A2H + bne div8 ; next bit + rts setbit: inc A0L ; do insert bit - dec A2H - bne div8 ; next bit - rts + dec A2H + bne div8 ; next bit + rts #endif /* __H8300__ */ #endif /* L_divhi3 */ @@ -306,7 +304,7 @@ setbit: inc A0L ; do insert bit ;; 4 byte integer divides for the H8/300. ;; -;; We have one routine which does all the work and lots of +;; We have one routine which does all the work and lots of ;; little ones which prepare the args and massage the sign. ;; We bunch all of this into one object file since there are several ;; "supporting routines". @@ -320,8 +318,8 @@ setbit: inc A0L ; do insert bit #ifdef __H8300__ divnorm: - mov.b #0,S2L ; keep the sign in S2 mov.b A0H,A0H ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L bge postive ; negate arg @@ -334,28 +332,26 @@ divnorm: addx #0,A1H addx #0,A0L addx #0,A0H - - mov.b #1,S2L ; the sign will be -ve postive: mov.b A2H,A2H ; is the denominator -ve bge postive2 - not A2L + not A2L not A2H not A3L not A3H - add.b #1,A3L + add.b #1,A3L addx #0,A3H addx #0,A2L addx #0,A2H - xor #1,S2L ; toggle result sign + xor.b #0x08,S2L ; toggle the result sign postive2: rts ;; Basically the same, except that the sign of the divisor determines ;; the sign. modnorm: - mov.b #0,S2L ; keep the sign in S2 mov.b A0H,A0H ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L bge mpostive ; negate arg @@ -368,16 +364,14 @@ modnorm: addx #0,A1H addx #0,A0L addx #0,A0H - - mov.b #1,S2L ; the sign will be -ve mpostive: mov.b A2H,A2H ; is the denominator -ve bge mpostive2 - not A2L + not A2L not A2H not A3L not A3H - add.b #1,A3L + add.b #1,A3L addx #0,A3H addx #0,A2L addx #0,A2H @@ -387,19 +381,18 @@ mpostive2: #else /* __H8300H__ */ divnorm: - mov.b #0,S2L ; keep the sign in S2 mov.l A0P,A0P ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L bge postive neg.l A0P ; negate arg - mov.b #1,S2L ; the sign will be -ve postive: mov.l A1P,A1P ; is the denominator -ve bge postive2 neg.l A1P ; negate arg - xor.b #1,S2L ; toggle result sign + xor.b #0x08,S2L ; toggle the result sign postive2: rts @@ -407,12 +400,11 @@ postive2: ;; Basically the same, except that the sign of the divisor determines ;; the sign. modnorm: - mov.b #0,S2L ; keep the sign in S2 mov.l A0P,A0P ; is the numerator -ve + stc ccr,S2L ; keep the sign in bit 3 of S2L bge mpostive neg.l A0P ; negate arg - mov.b #1,S2L ; the sign will be -ve mpostive: mov.l A1P,A1P ; is the denominator -ve @@ -429,60 +421,70 @@ mpostive2: ; denominator in A2/A3 .global ___modsi3 ___modsi3: - PUSHP S2P +#ifdef __H8300__ + PUSHP S2P PUSHP S0P PUSHP S1P - bsr modnorm bsr divmodsi4 -#ifdef __H8300__ mov S0,A0 mov S1,A1 + bra exitdiv #else - mov.l S0P,A0P -#endif + PUSHP S2P + bsr modnorm + bsr ___udivsi3 + mov.l er3,er0 bra exitdiv +#endif + ;; H8/300H and H8S version of ___udivsi3 is defined later in + ;; the file. +#ifdef __H8300__ .global ___udivsi3 ___udivsi3: PUSHP S2P PUSHP S0P PUSHP S1P - mov.b #0,S2L ; keep sign low bsr divmodsi4 - bra exitdiv + bra reti +#endif .global ___umodsi3 ___umodsi3: +#ifdef __H8300__ PUSHP S2P PUSHP S0P PUSHP S1P - mov.b #0,S2L ; keep sign low bsr divmodsi4 -#ifdef __H8300__ mov S0,A0 mov S1,A1 + bra reti #else - mov.l S0P,A0P + bsr ___udivsi3 + mov.l er3,er0 + rts #endif - bra exitdiv - + .global ___divsi3 ___divsi3: +#ifdef __H8300__ PUSHP S2P PUSHP S0P PUSHP S1P jsr divnorm jsr divmodsi4 +#else + PUSHP S2P + jsr divnorm + bsr ___udivsi3 +#endif ; examine what the sign should be exitdiv: - POPP S1P - POPP S0P - - or S2L,S2L + btst #3,S2L beq reti - + ; should be -ve #ifdef __H8300__ not A0H @@ -499,14 +501,18 @@ exitdiv: #endif reti: +#ifdef __H8300__ + POPP S1P + POPP S0P +#endif POPP S2P - rts + rts - ; takes A0/A1 numerator (A0P for 300H) - ; A2/A3 denominator (A1P for 300H) - ; returns A0/A1 quotient (A0P for 300H) - ; S0/S1 remainder (S0P for 300H) - ; trashes S2 + ; takes A0/A1 numerator (A0P for H8/300H) + ; A2/A3 denominator (A1P for H8/300H) + ; returns A0/A1 quotient (A0P for H8/300H) + ; S0/S1 remainder (S0P for H8/300H) + ; trashes S2H #ifdef __H8300__ @@ -516,7 +522,7 @@ divmodsi4: mov.b A2H,S2H or A2L,S2H or A3H,S2H - bne DenHighZero + bne DenHighNonZero mov.b A0H,A0H bne NumByte0Zero mov.b A0L,A0L @@ -543,10 +549,10 @@ NumByte3Zero: mov.b S1H,S1L mov.b #0x0,S1H - rts + rts ; have to do the divide by shift and test -DenHighZero: +DenHighNonZero: mov.b A0H,S1L mov.b A0L,A0H mov.b A1H,A0L @@ -567,7 +573,7 @@ nextbit: sub.w A3,S1 ; does it all fit subx A2L,S0L subx A2H,S0H - bhs setone + bhs setone add.w A3,S1 ; no, restore mistake addx A2L,S0L @@ -575,20 +581,21 @@ nextbit: dec S2H bne nextbit - rts - + rts + setone: inc A1L dec S2H bne nextbit - rts + rts #else /* __H8300H__ */ -divmodsi4: - sub.l S0P,S0P ; zero play area + ;; This function also computes the remainder and stores it in er3. + .global ___udivsi3 +___udivsi3: mov.w A1E,A1E ; denominator top word 0? - bne DenHighZero + bne DenHighNonZero ; do it the easy way, see page 107 in manual mov.w A0E,A2 @@ -596,36 +603,65 @@ divmodsi4: divxu.w A1,A2P mov.w A2E,A0E divxu.w A1,A0P - mov.w A0E,S0 + mov.w A0E,A3 mov.w A2,A0E - extu.l S0P + extu.l A3P rts -DenHighZero: - mov.w A0E,A2 - mov.b A2H,S0L - mov.b A2L,A2H - mov.b A0H,A2L - mov.w A2,A0E - mov.b A0L,A0H - mov.b #0,A0L - mov.b #24,S2H ; only do 24 iterations - -nextbit: - shll.l A0P ; double the answer guess - rotxl.l S0P ; double remainder - sub.l A1P,S0P ; does it all fit? - bhs setone - - add.l A1P,S0P ; no, restore mistake - dec S2H - bne nextbit - rts - -setone: - inc A0L - dec S2H - bne nextbit + ; er0 = er0 / er1 + ; er3 = er0 % er1 + ; trashes er1 er2 + ; expects er1 >= 2^16 +DenHighNonZero: + mov.l er0,er3 + mov.l er1,er2 +#ifdef __H8300H__ +divmod_L21: + shlr.l er0 + shlr.l er2 ; make divisor < 2^16 + mov.w e2,e2 + bne divmod_L21 +#else + shlr.l #2,er2 ; make divisor < 2^16 + mov.w e2,e2 + beq divmod_L22A +divmod_L21: + shlr.l #2,er0 +divmod_L22: + shlr.l #2,er2 ; make divisor < 2^16 + mov.w e2,e2 + bne divmod_L21 +divmod_L22A: + rotxl.w r2 + bcs divmod_L23 + shlr.l er0 + bra divmod_L24 +divmod_L23: + rotxr.w r2 + shlr.l #2,er0 +divmod_L24: +#endif + ;; At this point, + ;; er0 contains shifted dividend + ;; er1 contains divisor + ;; er2 contains shifted divisor + ;; er3 contains dividend, later remainder + divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) + extu.l er0 + beq divmod_L25 + subs #1,er0 ; er0 = AQ - 1 + mov.w e1,r2 + mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor + sub.w r2,e3 ; dividend - 65536 * er2 + mov.w r1,r2 + mulxu.w r0,er2 ; compute er3 = remainder (tentative) + sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor +divmod_L25: + cmp.l er1,er3 ; is divisor < remainder? + blo divmod_L26 + adds #1,er0 + sub.l er1,er3 ; correct the remainder +divmod_L26: rts #endif @@ -636,11 +672,11 @@ setone: ;; HImode multiply. ; The H8/300 only has an 8*8->16 multiply. ; The answer is the same as: -; +; ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 ; (we can ignore A1.h * A0.h cause that will all off the top) ; A0 in -; A1 in +; A1 in ; A0 answer #ifdef __H8300__ @@ -649,7 +685,7 @@ setone: .global ___mulhi3 ___mulhi3: mov.b A1L,A2L ; A2l gets srcb.l - mulxu A0L,A2 ; A2 gets first sub product + mulxu A0L,A2 ; A2 gets first sub product mov.b A0H,A3L ; prepare for mulxu A1L,A3 ; second sub product @@ -657,7 +693,7 @@ ___mulhi3: add.b A3L,A2H ; sum first two terms mov.b A1H,A3L ; third sub product - mulxu A0L,A3 + mulxu A0L,A3 add.b A3L,A2H ; almost there mov.w A2,A0 ; that is @@ -669,7 +705,7 @@ ___mulhi3: #ifdef L_mulsi3 ;; SImode multiply. -;; +;; ;; I think that shift and add may be sufficient for this. Using the ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way ;; the inner loop uses maybe 20 cycles + overhead, but terminates @@ -678,7 +714,7 @@ ___mulhi3: ;; A0/A1 src_a ;; A2/A3 src_b ;; -;; while (a) +;; while (a) ;; { ;; if (a & 1) ;; r += b; @@ -695,11 +731,10 @@ ___mulhi3: ___mulsi3: PUSHP S0P PUSHP S1P - PUSHP S2P - + sub.w S0,S0 sub.w S1,S1 - + ; while (a) _top: mov.w A0,A0 bne _more @@ -718,7 +753,7 @@ _nobit: rotxr A0L rotxr A1H rotxr A1L - + ; b <<= 1 add.w A3,A3 addx A2L,A2L @@ -726,41 +761,50 @@ _nobit: bra _top _done: - mov.w S0,A0 + mov.w S0,A0 mov.w S1,A1 - POPP S2P POPP S1P POPP S0P rts #else /* __H8300H__ */ +; +; mulsi3 for H8/300H - based on Renesas SH implementation +; +; by Toshiyasu Morita +; +; Old code: +; +; 16b * 16b = 372 states (worst case) +; 32b * 32b = 724 states (worst case) +; +; New code: +; +; 16b * 16b = 48 states +; 16b * 32b = 72 states +; 32b * 32b = 92 states +; + .global ___mulsi3 ___mulsi3: - sub.l A2P,A2P + mov.w r1,r2 ; ( 2 states) b * d + mulxu r0,er2 ; (22 states) - ; while (a) -_top: mov.l A0P,A0P - beq _done + mov.w e0,r3 ; ( 2 states) a * d + beq L_skip1 ; ( 4 states) + mulxu r1,er3 ; (22 states) + add.w r3,e2 ; ( 2 states) - ; if (a & 1) - bld #0,A0L - bcc _nobit +L_skip1: + mov.w e1,r3 ; ( 2 states) c * b + beq L_skip2 ; ( 4 states) + mulxu r0,er3 ; (22 states) + add.w r3,e2 ; ( 2 states) - ; r += b - add.l A1P,A2P - -_nobit: - ; a >>= 1 - shlr.l A0P - - ; b <<= 1 - shll.l A1P - bra _top - -_done: - mov.l A2P,A0P - rts +L_skip2: + mov.l er2,er0 ; ( 2 states) + rts ; (10 states) #endif #endif /* L_mulsi3 */ @@ -770,10 +814,10 @@ _done: space. For the H8/300H and H8S, the C version is good enough. */ #ifdef __H8300__ /* We still treat NANs different than libgcc2.c, but then, the - behaviour is undefined anyways. */ + behavior is undefined anyways. */ .global ___fixunssfsi ___fixunssfsi: - cmp.b #0x47,r0h + cmp.b #0x4f,r0h bge Large_num jmp @___fixsfsi Large_num: