c - 使用动态内存分配和重新散列哈希表在 C 编程中实现哈希表

标签 c memory-management memory-leaks hashtable

我有一项 C 语言编程作业,需要实现哈希表。我试图让设置的函数正常工作,并在每次负载因子超过 1 时重新哈希。另外我应该如何释放代码中的内存空间?

//代码如下:

hashtable.h代码:

/* This header is intended to be used with a chained
   hash-table implementation.  Correspondingly, there
   are two structure definitions -- one for the table
   itself, and another for the nodes of the individual
   chains. */

#ifndef HASHTABLE_H
#define HASHTABLE_H
#include <stddef.h> // size_t

/// structure for the nodes of the chains
struct node_s {
    char *key;
    int value;
    struct node_s *link;
};

/// This is the main structure for the overall table.
struct table_s {
    /// This should be used as a pointer to a dynamically
    /// allocated array of pointers to node structures.
    struct node_s **table;

    /// This is for storing the maximum number of buckets/lists in the table.
    size_t bins;

    /// This is for storing the current number of elements in the table
    size_t size;
};

/// A convenience declaration for referring to a pointer to a HT..
typedef struct table_s *hash_t;

/// Allocate a table with some initial empty bins.
/// @param bins -- the number of bins in the table (initally empty)
/// @return -- a pointer to a dynamically allocated hash table
hash_t create_table(int bins);

/// Set the value for a key in a given hash table.
/// @note -- if this is the first time setting this key, then the
///          table must make a dynamic copy of the string.  This
///          copy must be freed when the table is freed.
/// @note -- if the table exceeds a load factor of 1 after setting
///          the key/value pair, then this function should trigger
///          rehashing into a larger table.  It will then deallocate
///          the table field in the table_s structure, but it will
///          NOT free the table address in the table parameter.
/// @param table -- a pointer to a hash table
/// @param key -- a key
/// @param value -- a value
/// @return nothing
void set(hash_t table, char *key, int value);

/// Get the value for a key in a given hash table,
///   returning a default value if the key is not in the table.
/// @param table -- a pointer to a hash table
/// @param key -- a key
/// @param defval -- the default value for nonexistent keys
/// @return -- the value associated with the key (or the default value)
int get(hash_t table, char *key, int defval);


/// Free the table and all associated keys.
/// @param table -- a pointer to a hash table
/// @return nothing
void free_table(hash_t table);
#endif

hashtable.c代码:

    // Include statements
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <limits.h>
    #include "hashtable.h"
    #include "hash.h"

    /* Functions */

    /* Function get 
       Get the value for a key in a given hash table,
       returning a default value if the key is not in the table.
       @param table -- a pointer to a hash table
       @param key -- a key
       @param defval -- the default value for nonexistent keys
       @return -- the value associated with the key (or the default value)
    */

    int get(hash_t table, char *key, int defval) {

    // set key value to default value
    int keyValue;

    // Obtain the unsigned int of the hash key
    unsigned int hashValue = hash(key);

    // Obtain the bin index by finding the moduluus of hash value and number of bins
    int bin_index = hashValue % table->bins;

    // while loop
    while(table->table[bin_index] != NULL) {

        if(strcmp(key, table->table[bin_index]->key) == 0) {
            // Set the current node to the adjacent one
            keyValue = table->table[bin_index]->value;

        } else if(strcmp(key, table->table[bin_index]->key) != 0) {

            // Create a temporary struct node
            struct node_s *temp_Node = table->table[bin_index];

            while(temp_Node->link != NULL) {
                if(strcmp(key, temp_Node->key) != 0){
                    // Set the temp node to the adjacent linked node
                    temp_Node = temp_Node->link;
                } else {
                    keyValue = temp_Node->value;
                }
            }

        } else {
            // Key value equals default value
            keyValue = defval;
        } 

    }

    return keyValue;

}

/* Function create_table */
hash_t create_table(int bins) {

    size_t numberOfBins = bins;
    int index;

    // Declare a hash table
    hash_t hashTable = malloc(sizeof(hash_t));

    // Callocing the table pointer
    hashTable->table = calloc(numberOfBins, sizeof(struct node_s*) * bins);

    // Fill in the bins
    hashTable->bins = bins;

    for(index = 0; index < bins; index++) {
        hashTable->table[index] = NULL;
    }

    // Initialize the size to 0
    hashTable->size = 0;

    return hashTable;

}

/* Function set */
void set(hash_t table, char *key, int value) {

    unsigned int hash_number = hash(key);
      int bin_index = hash_number % table->bins;

      printf("Index Number: %d ", bin_index);

    // Create pointer current_Node
      //struct node_s *current_Node = malloc(sizeof(struct node_s));
      struct node_s *current_Node = table->table[bin_index];

     // Create pointer updated_Node
    struct node_s *updated_Node = malloc(sizeof(struct node_s));
      updated_Node->key = key;
      updated_Node->value = value;
      updated_Node->link = NULL;

      printf("Updated node key: %s ", updated_Node->key);
      printf("Updated node value: %d ", updated_Node->value);


    // If else statement

      if(current_Node == NULL) {
            table->table[bin_index] = updated_Node;

            // Increase the size of the table by 1
            table->size += 1;
      } else if(current_Node != NULL) {
            /*While the current node's adjacent one is not null
             Assign the current node to it's adjacent one.
        */

            while(current_Node->link != NULL) {
                current_Node = current_Node->link;
            }

            // Assign the node's adjacent one to the updated node
            current_Node->link = updated_Node;
      }

      printf("\n");


      // Calculate the load factor
    int count = 0;
    double load_factor = count;

    for(unsigned int j = 0; j < table->bins; j++) {

            current_Node = table->table[j];

            // While loop
            while(current_Node) {
                count++;
                // Update the reference node
                current_Node = current_Node->link;
            }   

            if(count > load_factor) {
                load_factor = count;
            }

            count = 0; // reset the count

    } 

     // Nested for loop for each particular bin
     if(load_factor > 1) {
         //size_t reference_size = table->size;

         // Double the size of the table and rehash each element
         table->size = table->size * 2;

         //for(int k = 0; k < 
         // Rehashing each element

     }



}



/* Function free_table - Free the table and all associated keys.
   @param table -- a pointer to a hash table
   @return nothing  
*/
void free_table(hash_t table) {

    // Traverse each individual node
    for(unsigned int pos = 0; pos < table->size; pos++) {
        struct node_s *currentNode = table->table[pos];

        // Free the elements of the node
        free(currentNode->key);

        while(currentNode->link != NULL) {
          currentNode = currentNode->link;
          free(currentNode->link);
        }
        // Free the current node
        free(currentNode);
    }  

    // Free the table and the table inside
    free(table->table);
    free(table);


}

hash.c代码:

#include "hash.h"

/// Calculate a hash function.
/// @param key -- a null-teriminated character string
/// @return -- the hash value
unsigned int hash(char *key) {
    unsigned int sum = 0;
    while (*key != '\0')
    sum += *(key++);
    return sum;
}

hash.h代码:

/// Defines a declaration for a hash function.

#ifndef HASH_H
#define HASH_H

/// Calculate a hash function.
/// @param key -- a null-teriminated character string
/// @return -- the hash value
unsigned int hash(char *key);
#endif

driver.c代码:

#include "hashtable.h"

#include <assert.h> // assert
#include <stdint.h> // uint64_t
#include <stdio.h>
#include <stdlib.h> // rand
#include <time.h> // clock, CLOCKS_PER_SEC

// The maximum length of a key for THIS DRIVER ONLY.
// The general implementation should support arbitrary length keys!
// If you do not support arbitrary length keys, then you should lose points.
static const int max_key = 11; // strlen("4294967295") + 1;

// Generate random key/value pairs, and check they are set correctly.
// @param max_num -- the largest number to for a key
// @param trials -- the number of trials
static void checkit(int max_num, int trials) {
    char key[max_key];
    hash_t table = create_table(10);
    for (int i = 0; i < trials; i++) {
        int sample = rand() % max_num;
        sprintf(key, "%d", sample);
        set(table, key, sample);
    }
    unsigned int failures = 0;
    for (int i = 0; i < max_num; i++) {
        sprintf(key, "%d", i);
        int value = get(table, key, i);
        if(value != i) {
            fprintf(stderr, "error: key/val mismatch for %s - got %d\n",key, value);
            failures++;
        }
    }
    free_table(table);
    if (failures == 0) // ignoring the possibility of overflow
        printf("Sucessfully stored %d key/value pairs without"
               " loss of information.\n", trials);
    else
        printf("Failed on %u of %d trials (%.2f).\n", failures, trials,
               failures/(float)trials);
}

// Generate random key/value pairs and time the operation.
// @param max_num -- the largest number to for a key
// @param trials -- the number of trials
void timeit(int max_num, int trials) {
    char key[max_key];    
    hash_t table = create_table(10);
    uint64_t tics = 0;
    for (int i = 0; i < trials; i++) {
        int sample = rand() % max_num;
        sprintf(key, "%d", sample);
        uint64_t t1 = clock();
        set(table,key, 1 + get(table, key, 0));
        tics += clock() - t1;
    }
    free_table(table);
    printf("Test took %.2f seconds.\n", (float)tics/(float)CLOCKS_PER_SEC);

}

int main(int argc, char **argv) {
    // Default values
    int max_num = 100;
    int trials = 1000;

    if (argc == 3) {
        max_num = strtol(argv[1], NULL, 10);
        trials = strtol(argv[2], NULL, 10);
    } else if (argc != 1) {
        fprintf(stderr, "unexpected arguments.\n");
        fprintf(stderr, "usage: %s [max_num trials]\n", argv[0]);
        exit(1);
    }
    checkit(max_num, trials);
    timeit(max_num, trials);
    return 0;
}

最佳答案

我不会为你做家庭作业——那是作弊。

但是 - 一些观察结果:

  • 计算负载因子:哈希表的要点在于它具有分摊 O(1) 插入功能。每次在 set() 中添加一个项目时,通过迭代整个表,它会变成O(n)。相反,保留插入到 table_s 中的项目的计数。

  • Rehasing:这有效地创建了一个新的哈希表 - 非最佳解决方案是使用新的 struct node_s **table 来保存它,进行迭代在现有表上(提到的用于计算负载因子的代码正是这样做的),将每个项目插入到新表中。您可以通过不复制(从而重新分配)做得更好。 node_s 对象 - 相反,只需将其重新链接到新表中。

  • 我建议将 set()get() 分解为多个函数。为了实现上述建议,您需要一个 set() 的插入函数,该函数作用于 struct node_s **t

  • 清理:我不太明白释放内存时遇到的问题 - free_table() 乍一看似乎很合理。

关于c - 使用动态内存分配和重新散列哈希表在 C 编程中实现哈希表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55081483/

相关文章:

C++:动态内存分配和类构造函数

c - 在字符串数组中搜索字符串的函数

c - C语言中如何比较两个指针的值

c++ - C/C++ 中的内存对齐

java - 如何计算对象的内存使用情况?

c - C中包含结构的结构中的字节分配

iphone - 仪器内存泄漏与 ARC 混淆

AngularJS - $destroy 是否删除事件监听器?

c - 劫持系统("/bin/sh")运行任意命令

c - fseek() 和 fork() 无法正常工作