X-Git-Url: https://oss.titaniummirror.com/gitweb?a=blobdiff_plain;f=zlib%2Fcontrib%2Fmasm686%2Fmatch.asm;fp=zlib%2Fcontrib%2Fmasm686%2Fmatch.asm;h=4b03a71abd5998f095e500f6bbf6ddcca9a1eaad;hb=6fed43773c9b0ce596dca5686f37ac3fc0fa11c0;hp=0000000000000000000000000000000000000000;hpb=27b11d56b743098deb193d510b337ba22dc52e5c;p=msp430-gcc.git diff --git a/zlib/contrib/masm686/match.asm b/zlib/contrib/masm686/match.asm new file mode 100644 index 00000000..4b03a71a --- /dev/null +++ b/zlib/contrib/masm686/match.asm @@ -0,0 +1,413 @@ + +; match.asm -- Pentium-Pro optimized version of longest_match() +; +; Updated for zlib 1.1.3 and converted to MASM 6.1x +; Copyright (C) 2000 Dan Higdon +; and Chuck Walbourn +; Corrections by Cosmin Truta +; +; This is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License. + +; Based on match.S +; Written for zlib 1.1.2 +; Copyright (C) 1998 Brian Raiter +; +; Modified by Gilles Vollant (2005) for add gzhead and gzindex + + .686P + .MODEL FLAT + +;=========================================================================== +; EQUATES +;=========================================================================== + +MAX_MATCH EQU 258 +MIN_MATCH EQU 3 +MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) +MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) + +;=========================================================================== +; STRUCTURES +;=========================================================================== + +; This STRUCT assumes a 4-byte alignment + +DEFLATE_STATE STRUCT +ds_strm dd ? +ds_status dd ? +ds_pending_buf dd ? +ds_pending_buf_size dd ? +ds_pending_out dd ? +ds_pending dd ? +ds_wrap dd ? +; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) +ds_gzhead dd ? +ds_gzindex dd ? +ds_data_type db ? +ds_method db ? + db ? ; padding + db ? ; padding +ds_last_flush dd ? +ds_w_size dd ? ; used +ds_w_bits dd ? +ds_w_mask dd ? ; used +ds_window dd ? ; used +ds_window_size dd ? +ds_prev dd ? ; used +ds_head dd ? +ds_ins_h dd ? +ds_hash_size dd ? +ds_hash_bits dd ? +ds_hash_mask dd ? +ds_hash_shift dd ? +ds_block_start dd ? +ds_match_length dd ? ; used +ds_prev_match dd ? ; used +ds_match_available dd ? +ds_strstart dd ? ; used +ds_match_start dd ? ; used +ds_lookahead dd ? ; used +ds_prev_length dd ? ; used +ds_max_chain_length dd ? ; used +ds_max_laxy_match dd ? +ds_level dd ? +ds_strategy dd ? +ds_good_match dd ? ; used +ds_nice_match dd ? ; used + +; Don't need anymore of the struct for match +DEFLATE_STATE ENDS + +;=========================================================================== +; CODE +;=========================================================================== +_TEXT SEGMENT + +;--------------------------------------------------------------------------- +; match_init +;--------------------------------------------------------------------------- + ALIGN 4 +PUBLIC _match_init +_match_init PROC + ; no initialization needed + ret +_match_init ENDP + +;--------------------------------------------------------------------------- +; uInt longest_match(deflate_state *deflatestate, IPos curmatch) +;--------------------------------------------------------------------------- + ALIGN 4 + +PUBLIC _longest_match +_longest_match PROC + +; Since this code uses EBP for a scratch register, the stack frame must +; be manually constructed and referenced relative to the ESP register. + +; Stack image +; Variables +chainlenwmask = 0 ; high word: current chain len + ; low word: s->wmask +window = 4 ; local copy of s->window +windowbestlen = 8 ; s->window + bestlen +scanend = 12 ; last two bytes of string +scanstart = 16 ; first two bytes of string +scanalign = 20 ; dword-misalignment of string +nicematch = 24 ; a good enough match size +bestlen = 28 ; size of best match so far +scan = 32 ; ptr to string wanting match +varsize = 36 ; number of bytes (also offset to last saved register) + +; Saved Registers (actually pushed into place) +ebx_save = 36 +edi_save = 40 +esi_save = 44 +ebp_save = 48 + +; Parameters +retaddr = 52 +deflatestate = 56 +curmatch = 60 + +; Save registers that the compiler may be using + push ebp + push edi + push esi + push ebx + +; Allocate local variable space + sub esp,varsize + +; Retrieve the function arguments. ecx will hold cur_match +; throughout the entire function. edx will hold the pointer to the +; deflate_state structure during the function's setup (before +; entering the main loop). + + mov edx, [esp+deflatestate] +ASSUME edx:PTR DEFLATE_STATE + + mov ecx, [esp+curmatch] + +; uInt wmask = s->w_mask; +; unsigned chain_length = s->max_chain_length; +; if (s->prev_length >= s->good_match) { +; chain_length >>= 2; +; } + + mov eax, [edx].ds_prev_length + mov ebx, [edx].ds_good_match + cmp eax, ebx + mov eax, [edx].ds_w_mask + mov ebx, [edx].ds_max_chain_length + jl SHORT LastMatchGood + shr ebx, 2 +LastMatchGood: + +; chainlen is decremented once beforehand so that the function can +; use the sign flag instead of the zero flag for the exit test. +; It is then shifted into the high word, to make room for the wmask +; value, which it will always accompany. + + dec ebx + shl ebx, 16 + or ebx, eax + mov [esp+chainlenwmask], ebx + +; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + mov eax, [edx].ds_nice_match + mov ebx, [edx].ds_lookahead + cmp ebx, eax + jl SHORT LookaheadLess + mov ebx, eax +LookaheadLess: + mov [esp+nicematch], ebx + +;/* register Bytef *scan = s->window + s->strstart; */ + + mov esi, [edx].ds_window + mov [esp+window], esi + mov ebp, [edx].ds_strstart + lea edi, [esi+ebp] + mov [esp+scan],edi + +;/* Determine how many bytes the scan ptr is off from being */ +;/* dword-aligned. */ + + mov eax, edi + neg eax + and eax, 3 + mov [esp+scanalign], eax + +;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ +;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ + + mov eax, [edx].ds_w_size + sub eax, MIN_LOOKAHEAD + sub ebp, eax + jg SHORT LimitPositive + xor ebp, ebp +LimitPositive: + +;/* int best_len = s->prev_length; */ + + mov eax, [edx].ds_prev_length + mov [esp+bestlen], eax + +;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ + + add esi, eax + mov [esp+windowbestlen], esi + +;/* register ush scan_start = *(ushf*)scan; */ +;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ +;/* Posf *prev = s->prev; */ + + movzx ebx, WORD PTR[edi] + mov [esp+scanstart], ebx + movzx ebx, WORD PTR[eax+edi-1] + mov [esp+scanend], ebx + mov edi, [edx].ds_prev + +;/* Jump into the main loop. */ + + mov edx, [esp+chainlenwmask] + jmp SHORT LoopEntry + +;/* do { +; * match = s->window + cur_match; +; * if (*(ushf*)(match+best_len-1) != scan_end || +; * *(ushf*)match != scan_start) continue; +; * [...] +; * } while ((cur_match = prev[cur_match & wmask]) > limit +; * && --chain_length != 0); +; * +; * Here is the inner loop of the function. The function will spend the +; * majority of its time in this loop, and majority of that time will +; * be spent in the first ten instructions. +; * +; * Within this loop: +; * %ebx = scanend +; * %ecx = curmatch +; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) +; * %esi = windowbestlen - i.e., (window + bestlen) +; * %edi = prev +; * %ebp = limit +; */ + + ALIGN 4 +LookupLoop: + and ecx, edx + movzx ecx, WORD PTR[edi+ecx*2] + cmp ecx, ebp + jbe LeaveNow + sub edx, 000010000H + js LeaveNow + +LoopEntry: + movzx eax, WORD PTR[esi+ecx-1] + cmp eax, ebx + jnz SHORT LookupLoop + + mov eax, [esp+window] + movzx eax, WORD PTR[eax+ecx] + cmp eax, [esp+scanstart] + jnz SHORT LookupLoop + +;/* Store the current value of chainlen. */ + + mov [esp+chainlenwmask], edx + +;/* Point %edi to the string under scrutiny, and %esi to the string we */ +;/* are hoping to match it up with. In actuality, %esi and %edi are */ +;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ +;/* initialized to -(MAX_MATCH_8 - scanalign). */ + + mov esi, [esp+window] + mov edi, [esp+scan] + add esi, ecx + mov eax, [esp+scanalign] + mov edx, -MAX_MATCH_8 + lea edi, [edi+eax+MAX_MATCH_8] + lea esi, [esi+eax+MAX_MATCH_8] + +;/* Test the strings for equality, 8 bytes at a time. At the end, +; * adjust %edx so that it is offset to the exact byte that mismatched. +; * +; * We already know at this point that the first three bytes of the +; * strings match each other, and they can be safely passed over before +; * starting the compare loop. So what this code does is skip over 0-3 +; * bytes, as much as necessary in order to dword-align the %edi +; * pointer. (%esi will still be misaligned three times out of four.) +; * +; * It should be confessed that this loop usually does not represent +; * much of the total running time. Replacing it with a more +; * straightforward "rep cmpsb" would not drastically degrade +; * performance. +; */ + +LoopCmps: + mov eax, DWORD PTR[esi+edx] + xor eax, DWORD PTR[edi+edx] + jnz SHORT LeaveLoopCmps + + mov eax, DWORD PTR[esi+edx+4] + xor eax, DWORD PTR[edi+edx+4] + jnz SHORT LeaveLoopCmps4 + + add edx, 8 + jnz SHORT LoopCmps + jmp LenMaximum + ALIGN 4 + +LeaveLoopCmps4: + add edx, 4 + +LeaveLoopCmps: + test eax, 00000FFFFH + jnz SHORT LenLower + + add edx, 2 + shr eax, 16 + +LenLower: + sub al, 1 + adc edx, 0 + +;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ +;/* then automatically accept it as the best possible match and leave. */ + + lea eax, [edi+edx] + mov edi, [esp+scan] + sub eax, edi + cmp eax, MAX_MATCH + jge SHORT LenMaximum + +;/* If the length of the match is not longer than the best match we */ +;/* have so far, then forget it and return to the lookup loop. */ + + mov edx, [esp+deflatestate] + mov ebx, [esp+bestlen] + cmp eax, ebx + jg SHORT LongerMatch + mov esi, [esp+windowbestlen] + mov edi, [edx].ds_prev + mov ebx, [esp+scanend] + mov edx, [esp+chainlenwmask] + jmp LookupLoop + ALIGN 4 + +;/* s->match_start = cur_match; */ +;/* best_len = len; */ +;/* if (len >= nice_match) break; */ +;/* scan_end = *(ushf*)(scan+best_len-1); */ + +LongerMatch: + mov ebx, [esp+nicematch] + mov [esp+bestlen], eax + mov [edx].ds_match_start, ecx + cmp eax, ebx + jge SHORT LeaveNow + mov esi, [esp+window] + add esi, eax + mov [esp+windowbestlen], esi + movzx ebx, WORD PTR[edi+eax-1] + mov edi, [edx].ds_prev + mov [esp+scanend], ebx + mov edx, [esp+chainlenwmask] + jmp LookupLoop + ALIGN 4 + +;/* Accept the current string, with the maximum possible length. */ + +LenMaximum: + mov edx, [esp+deflatestate] + mov DWORD PTR[esp+bestlen], MAX_MATCH + mov [edx].ds_match_start, ecx + +;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ +;/* return s->lookahead; */ + +LeaveNow: + mov edx, [esp+deflatestate] + mov ebx, [esp+bestlen] + mov eax, [edx].ds_lookahead + cmp ebx, eax + jg SHORT LookaheadRet + mov eax, ebx +LookaheadRet: + +; Restore the stack and return from whence we came. + + add esp, varsize + pop ebx + pop esi + pop edi + pop ebp + ret + +_longest_match ENDP + +_TEXT ENDS +END