MDGSF Software Engineer

[C/C++] hashtable

2016-07-01

冲突解决技术可以分为两类:开散列方法( open hashing,也称为拉链法,separate chaining )和闭散列方法( closed hashing,也称为开地址方法,open addressing )。这两种方法的不同之处在于:开散列法把发生冲突的关键码存储在散列表主表之外,而闭散列法把发生冲突的关键码存储在表中另一个槽内。

这里使用的是开散列。

A simple hashtabl.

#ifndef HASHTABLE_HJ
#define HASHTABLE_HJ

#include <stdio.h>
#include <string.h>

#define DEFAULT_BUCKET_SIZE 12000

namespace JCL
{
    namespace pub
    {

        enum EHashType {
            EDJBHASH,
            EAPHASH
        };

        typedef struct _SNode {
            struct _SNode * m_pNext;
            char m_acKey[BUFSIZ];
            void * m_pvVal;
            _SNode(): m_pNext(NULL), m_pvVal(NULL)
            {
                memset(m_acKey, 0, sizeof(m_acKey));
            }
            ~_SNode()
            {
            }
        }SNode;

        class CHashTable
        {
            public:
                CHashTable(
                        EHashType iHashType = EDJBHASH,
                        int iMaxBucketSize = DEFAULT_BUCKET_SIZE);

                virtual ~CHashTable();

                /*
                 * @param pcKey[IN]: the string key
                 * @return the value, failed: NULL.
                 */
                void * m_pvFind(const char * pcKey);

                /*
                 * @param pcKey[IN]: use string as key
                 * @param pvVal[IN]: use to store the value.
                 * @return 0:success, -1:failed.
                 */
                int m_iInsert(const char * pcKey, void * pvVal);


                /*
                 * @param pcKey[IN]: the string key
                 * @return 0:success, -1:failed.
                 */
                int m_iDelete(const char * pcKey);

                /*
                 * get the current size of hashtable.
                 */
                int m_iGetSize();

                void m_vShowStatus();

            private:
                CHashTable(const CHashTable&);
                CHashTable& operator = (const CHashTable&);

                SNode * m_pstFindNode(const char * pcKey);

                unsigned int m_iHashFunction(const char * pcKey);
                unsigned int DJBHash(const char * str);
                unsigned int APHash(const char* str);
                EHashType m_iHashType;

                int m_iSize;
                int m_iMaxBucketSize;
                SNode ** m_bucket;
        };
    }
}

#endif
#include "hashtable.h"
#include <stdlib.h>

#define MAX_BUCKET_SIZE 120000

using namespace JCL::pub;

CHashTable::CHashTable(
    EHashType iHashType,
    int iMaxBucketSize) :
    m_iHashType(iHashType),
    m_iSize(0),
    m_iMaxBucketSize(iMaxBucketSize)
{
    m_iMaxBucketSize = m_iMaxBucketSize > MAX_BUCKET_SIZE ? MAX_BUCKET_SIZE : m_iMaxBucketSize;
    m_bucket = new SNode*[m_iMaxBucketSize];
    for (int i = 0; i < m_iMaxBucketSize; i++) {
        m_bucket[i] = NULL;
    }
}

CHashTable::~CHashTable()
{
    SNode * pNode = NULL;
    for (int i = 0; i < m_iMaxBucketSize; i++)
    {
        pNode = m_bucket[ i ];
        if(NULL != pNode) {
            delete pNode;
        }
    }
    delete m_bucket;
}

void
CHashTable::m_vShowStatus()
{
    printf("\n******************************\n");
    printf("m_iMaxBucketSize: %d\n", m_iMaxBucketSize);
    printf("m_iSize: %d\n", m_iSize);
    int iEmpty = 0;
    int iOne = 0;
    int iTwo = 0;
    int iOthers = 0;
    for (int i = 0; i < m_iMaxBucketSize; i++)
    {
        SNode * pNode = m_bucket[i];
        if(NULL == pNode)
        {
            iEmpty++;
        }
        else
        {
            int iCurNum = 0;
            SNode * pCurNode = pNode;
            while (pCurNode != NULL) {
                iCurNum++;
                pCurNode = pCurNode->m_pNext;
            }

            switch(iCurNum)
            {
            case 1:
                iOne++;
                break;
            case 2:
                iTwo++;
                break;
            default:
                iOthers++;
                break;
            }
        }
    }
    printf("iOne: %d\n", iOne);
    printf("iTwo: %d\n", iTwo);
    printf("iOthers: %d\n", iOthers);
    printf("******************************\n");
}

int
CHashTable::m_iGetSize()
{
    return m_iSize;
}

int
CHashTable::m_iInsert(const char * pcKey, void * pvVal)
{
    if(NULL == pcKey || strlen(pcKey) == 0) {
        printf("m_iInsert() invalid parameters\n");
        return -1;
    }

    unsigned int uiKey = m_iHashFunction(pcKey);
    SNode * pNode = m_bucket[ uiKey % m_iMaxBucketSize ];
    if(NULL == pNode) {
        pNode = new SNode();
        pNode->m_pNext = NULL;
        strcpy(pNode->m_acKey, pcKey);
        pNode->m_pvVal = pvVal;

        m_bucket[ uiKey % m_iMaxBucketSize ] = pNode;
        m_iSize++;
        return 0;
    }

    SNode * pCurNode = pNode;
    while (pCurNode != NULL)
    {
        if(strcmp(pCurNode->m_acKey, pcKey) == 0)
        {
            printf("already exists pcKey=%s\n", pcKey);
            return -1;
        }
        pCurNode = pCurNode->m_pNext;
    }

    SNode * pNewNode = new SNode();
    strcpy(pNewNode->m_acKey, pcKey);
    pNewNode->m_pvVal = pvVal;

    pNewNode->m_pNext = pNode->m_pNext;
    pNode->m_pNext = pNewNode;
    m_iSize++;
    return 0;
}

int
CHashTable::m_iDelete(const char * pcKey)
{
    if(NULL == pcKey || strlen(pcKey) == 0) {
        printf("m_iDelete invalid parameters\n");
        return -1;
    }

    unsigned int uiKey = m_iHashFunction(pcKey);

    SNode * pNode = m_bucket[ uiKey % m_iMaxBucketSize ];
    if(NULL == pNode){
        printf("Not exist in hashtable\n");
        return -1;
    }

    if(strcmp(pNode->m_acKey, pcKey) == 0) {
        m_bucket[ uiKey % m_iMaxBucketSize ] = pNode->m_pNext;
        delete pNode;
        m_iSize--;
        return 0;
    }

    SNode * p1 = pNode->m_pNext;
    SNode * p2 = pNode;
    while (p1 != NULL)
    {
        if(strcmp(p1->m_acKey, pcKey) == 0)
        {
            p2->m_pNext = p1->m_pNext;
            delete p1;
            m_iSize--;
            return 0;
        }

        p1 = p1->m_pNext;
        p2 = p2->m_pNext;
    }

    return -1;
}

void *
CHashTable::m_pvFind(const char * pcKey)
{
    if(NULL == pcKey) {
        return NULL;
    }

    SNode * pNode = m_pstFindNode(pcKey);
    if(NULL == pNode) {
        return NULL;
    }

    return pNode->m_pvVal;
}

SNode *
CHashTable::m_pstFindNode(const char * pcKey)
{
    unsigned int uiKey = m_iHashFunction(pcKey);
    SNode * pNode = m_bucket[ uiKey % m_iMaxBucketSize ];
    if(NULL == pNode) {
        return NULL;
    }

    while (pNode != NULL)
    {
        if(strcmp(pNode->m_acKey, pcKey) == 0) {
            return pNode;
        }
        pNode = pNode->m_pNext;
    }

    return NULL;
}

unsigned int
CHashTable::m_iHashFunction(const char * pcKey)
{
    switch(m_iHashType)
    {
    case EDJBHASH:
        return DJBHash(pcKey);
        break;
    case EAPHASH:
        return APHash(pcKey);
        break;
    default:
        printf("m_iHashFunction() unknown hash type\n");
        break;
    }
    return 0;
}

unsigned int
CHashTable::DJBHash(const char * str)
{
    char * pcKey = const_cast<char*>(str);
    unsigned int hash = 5381;
    while (*pcKey)
    {
        hash = ((hash << 5) + hash) + (*pcKey);
        pcKey++;
    }
    return hash;
}

unsigned int
CHashTable::APHash(const char* str)
{
    char * pcKey = const_cast<char*>(str);

    unsigned int hash = 0xAAAAAAAA;
    unsigned int i    = 0;

    while (*pcKey)
    {
        hash ^= ((i & 1) == 0) ? (  (hash <<  7) ^ (*pcKey) * (hash >> 3)) :
            (~((hash << 11) + ((*pcKey) ^ (hash >> 5))));
        pcKey++;
        i++;
    }

    return hash;
}

hash 算法是从下面这个链接复制来的。

http://www.partow.net/programming/hashfunctions/


weixingongzhonghao

Comments

Content