13.19 Texthash Interface

ORIGIN '~beta/basiclib/betaenv';
LIB_DEF 'texthash' '../lib';
--- lib:attributes ---
(* This is a pattern implementing a hash of a text into an integer.
 * 
 * Usage:
 *      ph: @honeyman;
 *      ...
 * do ph.init;
 *    ...
 *    'Some text' -> ph.hash -> hashValue
 * 
 * This version is a BETA implementation of a hash function found in:
 *
 *       C News Source, which contains the following statement:
 *
 * "dbz.c  V3.2
 *
 * Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
 * You can use this code in any manner, as long as you leave my name on it
 * and don't hold me responsible for any problems with it.
 *
 * Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun  5 00:27:08 CDT 1988
 *
 * Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
 * 
 * Major reworking by Henry Spencer as part of the C News project."
 *
 * The following text is the original comment before the hash function:
 *
 * This is a simplified version of the pathalias hashing function.
 * Thanks to Steve Belovin and Peter Honeyman
 *
 * hash a string into a long int.  31 bit crc (from andrew appel).
 * the crc table is computed at run time by crcinit() -- we could
 * precompute, but it takes 1 clock tick on a 750.
 *
 * This fast table calculation works only if POLY is a prime polynomial
 * in the field of integers modulo 2.  Since the coefficients of a
 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
 * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
 * 31 down to 25 are zero.  Happily, we have candidates, from
 * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
 *      x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
 *      x^31 + x^3 + x^0
 *
 * We reverse the bits to get:
 *      111101010000000000000000000000001 but drop the last 1
 *         f   5   0   0   0   0   0   0
 *      010010000000000000000000000000001 ditto, for 31-bit crc
 *         4   8   0   0   0   0   0   0
 *)

honeyman: (* BETA VERSION *)
  (# POLY: (# exit 0x48000000 #);
     CrcTable: [128]@Integer;

     init:
       (# j,sum: @Integer;
       do 
          (for i:128 repeat
               0->sum; 6->j;
               loop:
                 (#
                 do (if ((i-1) %Band (1 %sll j)) then
                        (sum %Bxor (POLY %srl j))->sum;
                    if);
                    (if j>0 then j-1->j; restart loop if);
                 #);
               sum->CrcTable[i];
          for);
       #);

     hash: @  
       (# t: ^Text; sum: @Integer;
       enter t[]
       do 0->sum;
          (for i:t.lgth repeat
               (sum %srl 7) %Bxor CrcTable[((sum %Bxor t.T[i]) %Band 0x7f) + 1]
                 ->sum;
          for);
       exit sum
       #);
  #)


13.19 Texthash Interface
© 1990-2004 Mjølner Informatics
[Modified: Saturday January 22nd 2000 at 0:18]