tpl_linHash.H

00001 
00002 /*
00003   This file is part of Aleph system
00004 
00005   Copyright (c) 2002, 2003, 2004, 2005, 2006
00006   UNIVERSITY LOS ANDES (ULA) Merida - REPÚBLICA BOLIVARIANA DE VENEZUELA  
00007 
00008   - Center of Studies in Microelectronics & Distributed Systems (CEMISID) 
00009   - ULA Computer Science Department
00010   - FUNDACITE Mérida - Ministerio de Ciencia y Tecnología
00011 
00012   PERMISSION TO USE, COPY, MODIFY AND DISTRIBUTE THIS SOFTWARE AND ITS
00013   DOCUMENTATION IS HEREBY GRANTED, PROVIDED THAT BOTH THE COPYRIGHT
00014   NOTICE AND THIS PERMISSION NOTICE APPEAR IN ALL COPIES OF THE
00015   SOFTWARE, DERIVATIVE WORKS OR MODIFIED VERSIONS, AND ANY PORTIONS
00016   THEREOF, AND THAT BOTH NOTICES APPEAR IN SUPPORTING DOCUMENTATION.
00017 
00018   Aleph is distributed in the hope that it will be useful, but WITHOUT
00019   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00020   or FITNESS FOR A PARTICULAR PURPOSE.
00021 
00022   UNIVERSIDAD DE LOS ANDES requests users of this software to return to 
00023 
00024   Leandro Leon
00025   CEMISID 
00026   Ed La Hechicera 
00027   3er piso, ala sur
00028   Facultad de Ingenieria 
00029   Universidad de Los Andes 
00030   Merida - REPÚBLICA BOLIVARIANA DE VENEZUELA    or
00031 
00032   lrleon@ula.ve
00033 
00034   any improvements or extensions that they make and grant Universidad 
00035   de Los Andes (ULA) the rights to redistribute these changes. 
00036 
00037   Aleph is (or was) granted by: 
00038   - Consejo de Desarrollo Cientifico, Humanistico, Tecnico de la ULA 
00039     (CDCHT)
00040   - Fundacite Mérida
00041 */
00042 
00043 
00044 # ifndef TPL_LINHASH_H
00045 # define TPL_LINHASH_H
00046 # include <primes.H>
00047 # include <tpl_dynArray.H>
00048 # include <tpl_lhash.H>
00049 
00050 # ifdef N
00051 # define NBACKUP N
00052 # undef N
00053 # endif
00054 
00055 # ifdef M
00056 # define MBACKUP M
00057 # undef M
00058 # endif
00059 
00060 using namespace Aleph;
00061 
00062 namespace Aleph {
00063 
00089     template <typename Key, 
00090               template <class> class BucketType, 
00091               class Cmp = Aleph::equal_to<Key> >
00092 class GenLinearHashTable 
00093 {
00094 
00095 public:
00096   
00099   typedef size_t (*Hash_Fct)(const Key &);
00100 
00102   typedef BucketType<Key> Bucket;
00103 
00105   static const float default_upper_alpha;
00106 
00108   static const float default_lower_alpha;
00109 
00110 private:
00111 
00112   typedef Dnode<Key> BucketList;
00113 
00114   typedef typename Dnode<Key>::Iterator BucketItor;
00115 
00116   static size_t multiply_by_two(const size_t & n) { return n << 1; }
00117 
00118   static size_t divide_by_two(const size_t & n) { return n >> 1; }
00119 
00120 
00121   DynArray<BucketList>   table;
00122   Hash_Fct               hash_fct; // puntero a función hash
00123   size_t                 M; // Tamaño de la tabla
00124   size_t                 N; // Número de elementos que tiene la tabla
00125   size_t                 busy_slots_counter; // Cantidad de entradas del
00126                                              // arreglo ocupadas 
00127   bool                   remove_all_buckets; // Indica si se deben liberar
00128                                              // las cubetas cuando se
00129                                              // llame al destructor 
00130   float upper_alpha; // factor de carga superior
00131   float lower_alpha; // factor de carga inferior
00132   size_t p; // índice de la lista que se particiona (o aumenta)
00133   size_t l; // cantidad de veces que se ha duplicado la tabla
00134   size_t MP; // guarda el valor p + M
00135   size_t MM; // producto 2*M
00136   const size_t len;
00137   size_t call_hash_fct(const Key & key) const
00138   {
00139     const size_t hash = (*hash_fct)(key);
00140 
00141     const size_t i = hash % M;
00142 
00143     return i < p ? hash % MM : i;
00144   }
00145   void expand()
00146   {
00147         // expandir la tabla hasta que la carga esté debajo de upper_alpha
00148     for (float alpha = 1.0*N/MP; alpha >= upper_alpha; alpha = 1.0*N/MP)
00149       {
00150         BucketList * src_list_ptr = table.test(p);
00151 
00152         if (src_list_ptr != NULL) // ¿table[p] está escrita?
00153           if (not src_list_ptr->is_empty()) // ¿table[p] no está vacía'
00154             {
00155               BucketList * tgt_list_ptr = NULL; 
00156          
00157                   // recorrer toda la lista de colisiones
00158               for (BucketItor it(*src_list_ptr); it.has_current(); /* nothing */)
00159                 {
00160                   Bucket * bucket = static_cast<Bucket*>(it.get_current());
00161 
00162                   it.next(); // avance al siguiente elemento de la lista
00163             
00164                   const Key & key = bucket->get_key();
00165 
00166                   const int i = (*hash_fct)(key) % MM;
00167 
00168                   if (i == p) // ¿pertenece esta clave a table[p]?
00169                     continue; // sí ==> clave permanece en table[p]
00170 
00171                   if (tgt_list_ptr == NULL)
00172                     tgt_list_ptr = &table.touch(MP);
00173 
00174                       // bucket no pertenece a table[p]] sino a table[p+m] ==>
00175                       // eliminar bucket de table[i] e insertarlo en table[p+m]
00176                   bucket->del();
00177                   tgt_list_ptr->append(bucket);
00178                 }
00179 
00180               if (src_list_ptr->is_empty()) // ¿table[p] quedó vacía?
00181                 --busy_slots_counter; // sí ==> un slot vacío 
00182 
00183               ++busy_slots_counter; // uno nuevo por table[p+M]
00184             } 
00185         ++p;
00186         ++MP;
00187         if (p == M) // (p == 2*M) ¿debe duplicarse el tamaño de la tabla?
00188           {     // sí ==> modificar el tamaño de la tabla a 2*M
00189             ++l;   // Cantidad de veces que se ha duplicado la tabla
00190             p = 0;
00191             MP = M = MM; // se les asigna 2*M
00192             MM = multiply_by_two(MM);
00193           }
00194       } 
00195   }
00196   void contract()
00197   {
00198         // contraer la tabla hasta que la carga esté por debajo de lower_alpha
00199     for (float alpha = (1.0*N)/MP; alpha <= lower_alpha and MP > len; 
00200          alpha = (1.0*N)/MP)
00201       {
00202         if (p == 0) // ¿debe dividirse entre 2 el tamaño de la tabla?
00203           {     // sí ==> actualizar tamaño de la tabla a M/2
00204             --l; // Cantidad de veces que se ha duplicado la tabla disminuye
00205             MM = M; // se divide entre dos
00206             M = divide_by_two(M);
00207             p = M - 1;
00208           }
00209         else
00210           // no ==> sólo reducir índice p
00211           --p;
00212 
00213         --MP;
00214         if (MP < table.size()) // ¿Existe table[MP]]? 
00215           {
00216             BucketList * src_list_ptr = table.test(MP);
00217 
00218             if (src_list_ptr != NULL) // ¿existe entrada para table[p+M]?
00219               {
00220                 if (not src_list_ptr->is_empty()) // ¿está vacía table[p+M]?
00221                   {     // no ==> fusionar las listas
00222                     BucketList & tgt_list = table.touch(p); // asegura table[p]
00223                     
00224                     tgt_list.concat_list(src_list_ptr);
00225 
00226                     --busy_slots_counter; // table[p+M] devino vacía
00227                   }
00228 
00229                 table.cut(MP); // eventualmente liberar memoria de table[p+M]
00230               }
00231           }
00232       }
00233   }
00234 
00235 public:
00236 
00256   GenLinearHashTable(Hash_Fct       __hash_fct, 
00257                      const size_t & __len, 
00258                      const float &  __lower_alpha,
00259                      const float &  __upper_alpha,
00260                      const bool &   __remove_all_buckets) 
00261     throw(std::exception, std::length_error, std::domain_error,
00262           std::bad_alloc, std::overflow_error) 
00263     : table(__len), hash_fct(__hash_fct), M(__len), N(0), 
00264       busy_slots_counter(0), remove_all_buckets(__remove_all_buckets), 
00265       upper_alpha(__upper_alpha), lower_alpha(__lower_alpha),
00266       p(0), l(0), MP(M), MM(multiply_by_two(M)), len(__len)
00267   {
00268     if (M == 0)
00269       std::length_error("table's length is zero");
00270 
00271     if (MM > table.max_size())
00272       throw std::length_error("table's length too big");
00273       
00274     if (upper_alpha <= lower_alpha)
00275       throw std::domain_error("lower alpha is greater than lower alpha");
00276   }
00277 
00281   void set_upper_alpha(const float &  __upper_alpha)
00282   {
00283     if (__upper_alpha <= lower_alpha)
00284       throw std::domain_error("upper_alpha lower than lower_alpha");
00285 
00286     upper_alpha = __upper_alpha;
00287   }
00288 
00292   void set_lower_alpha(const float &  __lower_alpha)
00293   {
00294     if (__lower_alpha >= upper_alpha)
00295       throw std::domain_error("lower_alpha greater than upper_alpha");
00296 
00297     lower_alpha = __lower_alpha;
00298   }
00299 
00302   void empty()
00303   {
00304         // Recorrer cada una de la listas y liberar sus cubetas
00305     for (int i = 0; i < MP; ++i)
00306       {
00307         BucketList * list = table.test(i);
00308 
00309         if (list != NULL)
00310           list->remove_all_and_delete();
00311       }
00312     
00313     M = MP = len;
00314     MM = multiply_by_two(M);
00315     N = p = l = 0;
00316 
00317     table.cut(len); 
00318   }
00319 
00320   ~GenLinearHashTable()
00321   {
00322     if (remove_all_buckets)
00323       empty();
00324   }
00328   Bucket * search(const Key & key)
00329   {
00330     const int i = call_hash_fct(key);
00331 
00332     BucketList * list = table.test(i);
00333 
00334     if (list == NULL) // ¿Ha sido escrita alguna vez table[i]?
00335       return NULL; // No ==> el elemento no se encuentra en la tabla
00336 
00337     if (list->is_empty()) 
00338       return NULL;
00339 
00340         // buscar key en la lista de cubetas
00341     for (BucketItor it(*list); it.has_current(); it.next())
00342       {
00343         Bucket * bucket = static_cast<Bucket*>(it.get_current());
00344         
00345         if (Cmp() (key, bucket->get_key()))
00346           return bucket; 
00347       }
00348     
00349     return NULL; 
00350   }
00351 
00352 
00353 
00355   const size_t & size() const { return N; }
00356 
00358   const size_t & capacity() const { return MP; }
00359 
00361   const size_t & busy_slots() const { return busy_slots_counter; }
00362 
00365   const size_t & expansions() const { return l; }
00366 
00368   Bucket* insert(Bucket * bucket)
00369   {
00370     const int i = call_hash_fct(bucket->get_key());
00371 
00372     BucketList & list = table.touch(i); // asegura memoria para table[i]
00373 
00374     if (list.is_empty())
00375       ++busy_slots_counter;
00376 
00377     list.append(bucket);
00378 
00379     ++N;
00380 
00381     expand();
00382 
00383     return bucket;
00384   }
00387   Bucket * remove(Bucket * bucket)
00388   {
00389     Bucket * next = static_cast<Bucket*>(bucket->get_next());
00390 
00391     bucket->del(); // elimine de lista de colisiones
00392 
00393     if (next->is_empty()) // ¿lista de colisiones quedó vacía?
00394       --busy_slots_counter; // sí ==> un slot vacío
00395 
00396     --N;
00397 
00398     contract();
00399 
00400     return bucket;
00401   }
00402   void print()
00403   {
00404     for (int i = 0; i < MP; ++i)
00405       {
00406         cout << "table[" << i << "] = [ ";
00407         
00408         if (table.exist(i))
00409           {
00410             BucketList & list = table.access(i);
00411 
00412             if (not list.is_empty())
00413               for (BucketItor it(list); it.has_current(); it.next())
00414                 {
00415                   Bucket * bucket = static_cast<Bucket*>(it.get_current());
00416 
00417                   const Key & key = bucket->get_key();
00418 
00419                   cout << key << ",";
00420                 }
00421           }
00422         cout << "]" << endl;
00423       }
00424   }
00425 };
00426 
00427     template <typename Key, template <class> class BucketType, class Cmp>
00428   const float
00429 GenLinearHashTable<Key, BucketType, Cmp>::default_upper_alpha = 0.95;
00430 
00431     template <typename Key, template <class> class BucketType, class Cmp>
00432   const float 
00433 GenLinearHashTable<Key, BucketType, Cmp>::default_lower_alpha = 0.25;
00434 
00435 
00455     template <typename Key, class Cmp = Aleph::equal_to<Key> >
00456 class LinearHashTable : public GenLinearHashTable<Key, LhashBucket, Cmp>
00457 {
00458   // ...
00459 
00460 typedef GenLinearHashTable<Key, LhashBucket, Cmp> Base;
00461 
00462 public:
00463 
00465   typedef typename GenLinearHashTable<Key, LhashBucket, Cmp>::Bucket Bucket;
00466 
00468   typedef typename GenLhashTable<Key, LhashBucket<Key>, Cmp>::Hash_Fct Hash_Fct;
00469 
00491   LinearHashTable(Hash_Fct       hash_fct, 
00492                   const size_t & len,
00493                   const float &  lower_alpha = Base::default__lower_alpha,
00494                   const float &  upper_alpha = Base::default_upper_alpha,
00495                   const bool &   remove_all_buckets = true)
00496     throw(std::exception, std::length_error, std::runtime_error,
00497           std::bad_alloc, std::overflow_error)
00498     : Base(hash_fct, len, lower_alpha, upper_alpha, remove_all_buckets)
00499   {
00500     // Empty
00501   }
00502 
00503 };
00504 
00524   template <typename Key, class Cmp = Aleph::equal_to<Key> >
00525 class LinearHashTableVtl : public GenLinearHashTable<Key, LhashBucketVtl, Cmp>
00526 {
00527   // ...
00528 
00529   typedef GenLinearHashTable<Key, LhashBucketVtl, Cmp> Base;
00530 
00531 public:
00532 
00534   typedef typename GenLinearHashTable<Key, LhashBucketVtl, Cmp>::Bucket Bucket;
00535 
00537       typedef typename 
00538   GenLhashTable<Key, LhashBucketVtl<Key>, Cmp >::Hash_Fct Hash_Fct;
00539 
00561   LinearHashTableVtl(Hash_Fct       hash_fct, 
00562                      const size_t & len,
00563                      const float &  lower_alpha = Base::default_lower_alpha,
00564                      const float &  upper_alpha = Base::default_upper_alpha,
00565                      const bool &   remove_all_buckets = true)
00566     throw(std::exception, std::length_error, std::runtime_error,
00567           std::bad_alloc, std::overflow_error)
00568     : Base(hash_fct, len, lower_alpha, upper_alpha, remove_all_buckets)
00569   {
00570     // Empty
00571   }
00572 
00573 };
00574 
00575 } // end namespace Aleph
00576 
00577 # ifdef NBACKUP
00578 # define N NBACKUP
00579 # undef NBACKUP
00580 # endif
00581 
00582 # ifdef MBACKUP
00583 # define M MBACKUP
00584 # undef MBACKUP
00585 # endif
00586 
00587 # endif // TPL_LINHASH_H
00588 

Leandro R. León