![]() |
|
|||
File indexing completed on 2025-05-11 08:24:18
0001 /* SPDX-License-Identifier: BSD-2-Clause */ 0002 0003 /** 0004 * @file 0005 * 0006 * @ingroup rtems_rfs 0007 * 0008 * @brief RTEMS File Systems Directory Hash function 0009 */ 0010 0011 /* 0012 * Copyright (C) 2010 Chris Johns <chrisj@rtems.org> 0013 * 0014 * Redistribution and use in source and binary forms, with or without 0015 * modification, are permitted provided that the following conditions 0016 * are met: 0017 * 1. Redistributions of source code must retain the above copyright 0018 * notice, this list of conditions and the following disclaimer. 0019 * 2. Redistributions in binary form must reproduce the above copyright 0020 * notice, this list of conditions and the following disclaimer in the 0021 * documentation and/or other materials provided with the distribution. 0022 * 0023 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 0024 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0025 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0026 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 0027 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 0028 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 0029 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 0030 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 0031 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 0032 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 0033 * POSSIBILITY OF SUCH DAMAGE. 0034 */ 0035 0036 #ifdef HAVE_CONFIG_H 0037 #include "config.h" 0038 #endif 0039 0040 #include <rtems/rfs/rtems-rfs-dir-hash.h> 0041 0042 #ifdef __rtems__ 0043 # include <machine/endian.h> /* attempt to define endianness */ 0044 #endif 0045 #ifdef linux 0046 # include <endian.h> /* attempt to define endianness */ 0047 #endif 0048 0049 /* 0050 * My best guess at if you are big-endian or little-endian. This may 0051 * need adjustment. 0052 */ 0053 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 0054 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 0055 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 0056 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) 0057 # define HASH_LITTLE_ENDIAN 1 0058 # define HASH_BIG_ENDIAN 0 0059 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 0060 __BYTE_ORDER == __BIG_ENDIAN) || \ 0061 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) 0062 # define HASH_LITTLE_ENDIAN 0 0063 # define HASH_BIG_ENDIAN 1 0064 #else 0065 # define HASH_LITTLE_ENDIAN 0 0066 # define HASH_BIG_ENDIAN 0 0067 #endif 0068 0069 #define hashsize(n) ((uint32_t)1<<(n)) 0070 #define hashmask(n) (hashsize(n)-1) 0071 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 0072 0073 /* 0074 ------------------------------------------------------------------------------- 0075 mix -- mix 3 32-bit values reversibly. 0076 0077 This is reversible, so any information in (a,b,c) before mix() is still in 0078 (a,b,c) after mix(). 0079 0080 If four pairs of (a,b,c) inputs are run through mix(), or through mix() in 0081 reverse, there are at least 32 bits of the output that are sometimes the same 0082 for one pair and different for another pair. This was tested for: 0083 0084 * pairs that differed by one bit, by two bits, in any combination of top bits 0085 of (a,b,c), or in any combination of bottom bits of (a,b,c). 0086 0087 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the 0088 output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly 0089 produced by subtraction) look like a single 1-bit difference. 0090 0091 * the base values were pseudorandom, all zero but one bit set, or all zero 0092 plus a counter that starts at zero. 0093 0094 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this 0095 are: 0096 0097 4 6 8 16 19 4 0098 9 15 3 18 27 15 0099 14 9 3 7 17 3 0100 0101 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ" defined 0102 as + with a one-bit base and a two-bit delta. I used 0103 http://burtleburtle.net/bob/hash/avalanche.html to choose the operations, 0104 constants, and arrangements of the variables. 0105 0106 This does not achieve avalanche. There are input bits of (a,b,c) that fail 0107 to affect some output bits of (a,b,c), especially of a. The most thoroughly 0108 mixed value is c, but it doesn't really even achieve avalanche in c. 0109 0110 This allows some parallelism. Read-after-writes are good at doubling the 0111 number of bits affected, so the goal of mixing pulls in the opposite 0112 direction as the goal of parallelism. I did what I could. Rotates seem to 0113 cost as much as shifts on every machine I could lay my hands on, and rotates 0114 are much kinder to the top and bottom bits, so I used rotates. 0115 ------------------------------------------------------------------------------- 0116 */ 0117 #define mix(a,b,c) \ 0118 { \ 0119 a -= c; a ^= rot(c, 4); c += b; \ 0120 b -= a; b ^= rot(a, 6); a += c; \ 0121 c -= b; c ^= rot(b, 8); b += a; \ 0122 a -= c; a ^= rot(c,16); c += b; \ 0123 b -= a; b ^= rot(a,19); a += c; \ 0124 c -= b; c ^= rot(b, 4); b += a; \ 0125 } 0126 0127 /* 0128 ------------------------------------------------------------------------------- 0129 final -- final mixing of 3 32-bit values (a,b,c) into c 0130 0131 Pairs of (a,b,c) values differing in only a few bits will usually produce 0132 values of c that look totally different. This was tested for 0133 0134 * pairs that differed by one bit, by two bits, in any combination of top bits 0135 of (a,b,c), or in any combination of bottom bits of (a,b,c). 0136 0137 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the 0138 output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly 0139 produced by subtraction) look like a single 1-bit difference. * the base 0140 values were pseudorandom, all zero but one bit set, or all zero plus a 0141 counter that starts at zero. 0142 0143 These constants passed: 0144 14 11 25 16 4 14 24 0145 12 14 25 16 4 14 24 0146 and these came close: 0147 4 8 15 26 3 22 24 0148 10 8 15 26 3 22 24 0149 11 8 15 26 3 22 24 0150 ------------------------------------------------------------------------------- 0151 */ 0152 #define final(a,b,c) \ 0153 { \ 0154 c ^= b; c -= rot(b,14); \ 0155 a ^= c; a -= rot(c,11); \ 0156 b ^= a; b -= rot(a,25); \ 0157 c ^= b; c -= rot(b,16); \ 0158 a ^= c; a -= rot(c,4); \ 0159 b ^= a; b -= rot(a,14); \ 0160 c ^= b; c -= rot(b,24); \ 0161 } 0162 0163 /** 0164 * The follow is the documentation from Bob Jenkin's hash function: 0165 * 0166 * http://burtleburtle.net/bob/hash/doobs.html 0167 * 0168 * The function hashlittle() has been renamed. 0169 * 0170 * hashlittle() -- hash a variable-length key into a 32-bit value 0171 * 0172 * k : the key (the unaligned variable-length array of bytes) 0173 * length : the length of the key, counting by bytes 0174 * initval : can be any 4-byte value 0175 * 0176 * Returns a 32-bit value. Every bit of the key affects every bit of the 0177 * return value. Two keys differing by one or two bits will have totally 0178 * different hash values. 0179 * 0180 * The best hash table sizes are powers of 2. There is no need to do mod a 0181 * prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. 0182 * For example, if you need only 10 bits, do h = (h & hashmask(10)); In which 0183 * case, the hash table should have hashsize(10) elements. 0184 * 0185 * If you are hashing n strings (uint8_t **)k, do it like this: for (i=0, h=0; 0186 * i<n; ++i) h = hashlittle( k[i], len[i], h); 0187 * 0188 * By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 0189 * code any way you wish, private, educational, or commercial. It's free. 0190 * 0191 * Use for hash table lookup, or anything where one collision in 2^^32 is 0192 * acceptable. Do NOT use for cryptographic purposes. 0193 */ 0194 0195 #define initval (20010928) 0196 uint32_t 0197 rtems_rfs_dir_hash (const void *key, size_t length) 0198 { 0199 uint32_t a,b,c; /* internal state */ 0200 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 0201 0202 /* Set up the internal state */ 0203 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 0204 0205 u.ptr = key; 0206 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 0207 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 0208 /*const uint8_t *k8;*/ 0209 0210 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 0211 while (length > 12) 0212 { 0213 a += k[0]; 0214 b += k[1]; 0215 c += k[2]; 0216 mix(a,b,c); 0217 length -= 12; 0218 k += 3; 0219 } 0220 0221 /*----------------------------- handle the last (probably partial) block */ 0222 /* 0223 * "k[2]&0xffffff" actually reads beyond the end of the string, but 0224 * then masks off the part it's not allowed to read. Because the 0225 * string is aligned, the masked-off tail is in the same word as the 0226 * rest of the string. Every machine with memory protection I've seen 0227 * does it on word boundaries, so is OK with this. But VALGRIND will 0228 * still catch it and complain. The masking trick does make the hash 0229 * noticably faster for short strings (like English words). 0230 */ 0231 #ifndef VALGRIND 0232 0233 switch(length) 0234 { 0235 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 0236 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 0237 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 0238 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 0239 case 8 : b+=k[1]; a+=k[0]; break; 0240 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 0241 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 0242 case 5 : b+=k[1]&0xff; a+=k[0]; break; 0243 case 4 : a+=k[0]; break; 0244 case 3 : a+=k[0]&0xffffff; break; 0245 case 2 : a+=k[0]&0xffff; break; 0246 case 1 : a+=k[0]&0xff; break; 0247 case 0 : return c; /* zero length strings require no mixing */ 0248 } 0249 0250 #else /* make valgrind happy */ 0251 0252 k8 = (const uint8_t *)k; 0253 switch(length) 0254 { 0255 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 0256 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 0257 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 0258 case 9 : c+=k8[8]; /* fall through */ 0259 case 8 : b+=k[1]; a+=k[0]; break; 0260 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 0261 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 0262 case 5 : b+=k8[4]; /* fall through */ 0263 case 4 : a+=k[0]; break; 0264 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 0265 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 0266 case 1 : a+=k8[0]; break; 0267 case 0 : return c; 0268 } 0269 0270 #endif /* !valgrind */ 0271 0272 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 0273 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 0274 const uint8_t *k8; 0275 0276 /*--------------- all but last block: aligned reads and different mixing */ 0277 while (length > 12) 0278 { 0279 a += k[0] + (((uint32_t)k[1])<<16); 0280 b += k[2] + (((uint32_t)k[3])<<16); 0281 c += k[4] + (((uint32_t)k[5])<<16); 0282 mix(a,b,c); 0283 length -= 12; 0284 k += 6; 0285 } 0286 0287 /*----------------------------- handle the last (probably partial) block */ 0288 k8 = (const uint8_t *)k; 0289 switch(length) 0290 { 0291 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 0292 b+=k[2]+(((uint32_t)k[3])<<16); 0293 a+=k[0]+(((uint32_t)k[1])<<16); 0294 break; 0295 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 0296 case 10: c+=k[4]; 0297 b+=k[2]+(((uint32_t)k[3])<<16); 0298 a+=k[0]+(((uint32_t)k[1])<<16); 0299 break; 0300 case 9 : c+=k8[8]; /* fall through */ 0301 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 0302 a+=k[0]+(((uint32_t)k[1])<<16); 0303 break; 0304 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 0305 case 6 : b+=k[2]; 0306 a+=k[0]+(((uint32_t)k[1])<<16); 0307 break; 0308 case 5 : b+=k8[4]; /* fall through */ 0309 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 0310 break; 0311 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 0312 case 2 : a+=k[0]; 0313 break; 0314 case 1 : a+=k8[0]; 0315 break; 0316 case 0 : return c; /* zero length requires no mixing */ 0317 } 0318 0319 } else { /* need to read the key one byte at a time */ 0320 const uint8_t *k = (const uint8_t *)key; 0321 0322 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 0323 while (length > 12) 0324 { 0325 a += k[0]; 0326 a += ((uint32_t)k[1])<<8; 0327 a += ((uint32_t)k[2])<<16; 0328 a += ((uint32_t)k[3])<<24; 0329 b += k[4]; 0330 b += ((uint32_t)k[5])<<8; 0331 b += ((uint32_t)k[6])<<16; 0332 b += ((uint32_t)k[7])<<24; 0333 c += k[8]; 0334 c += ((uint32_t)k[9])<<8; 0335 c += ((uint32_t)k[10])<<16; 0336 c += ((uint32_t)k[11])<<24; 0337 mix(a,b,c); 0338 length -= 12; 0339 k += 12; 0340 } 0341 0342 /*-------------------------------- last block: affect all 32 bits of (c) */ 0343 switch(length) /* all the case statements fall through */ 0344 { 0345 case 12: c+=((uint32_t)k[11])<<24; 0346 case 11: c+=((uint32_t)k[10])<<16; 0347 case 10: c+=((uint32_t)k[9])<<8; 0348 case 9 : c+=k[8]; 0349 case 8 : b+=((uint32_t)k[7])<<24; 0350 case 7 : b+=((uint32_t)k[6])<<16; 0351 case 6 : b+=((uint32_t)k[5])<<8; 0352 case 5 : b+=k[4]; 0353 case 4 : a+=((uint32_t)k[3])<<24; 0354 case 3 : a+=((uint32_t)k[2])<<16; 0355 case 2 : a+=((uint32_t)k[1])<<8; 0356 case 1 : a+=k[0]; 0357 break; 0358 case 0 : return c; 0359 } 0360 } 0361 0362 final(a,b,c); 0363 return c; 0364 } 0365
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |