C에서 사전을 구현하는 빠른 방법

IT story

C에서 사전을 구현하는 빠른 방법

hot-time 2020. 7. 19. 09:12

C에서 사전을 구현하는 빠른 방법

C로 프로그램을 작성하는 동안 내가 놓친 것 중 하나는 사전 데이터 구조입니다. C로 구현하는 가장 편리한 방법은 무엇입니까? 성능을 찾고 있지 않지만 처음부터 쉽게 코딩 할 수 있습니다. string-> int와 같은 일반적인 것이기를 원하지 않습니다. 그러나 임의의 수의 항목을 저장할 수 있기를 바랍니다.

이것은 운동으로 의도 된 것입니다. 사용할 수있는 타사 라이브러리가 있음을 알고 있습니다. 그러나 존재하지 않는 것을 잠시 생각해보십시오. 이러한 상황에서 위의 요구 사항을 충족하는 사전을 구현할 수있는 가장 빠른 방법은 무엇입니까?

C 프로그래밍 언어의 6.6 절 은 간단한 사전 (해시 테이블) 데이터 구조를 제시합니다. 유용한 사전 구현이 이것보다 더 간단해질 수 있다고 생각하지 않습니다. 귀하의 편의를 위해 코드를 여기에 재현합니다.

struct nlist { /* table entry: */
    struct nlist *next; /* next entry in chain */
    char *name; /* defined name */
    char *defn; /* replacement text */
};

#define HASHSIZE 101
static struct nlist *hashtab[HASHSIZE]; /* pointer table */

/* hash: form hash value for string s */
unsigned hash(char *s)
{
    unsigned hashval;
    for (hashval = 0; *s != '\0'; s++)
      hashval = *s + 31 * hashval;
    return hashval % HASHSIZE;
}

/* lookup: look for s in hashtab */
struct nlist *lookup(char *s)
{
    struct nlist *np;
    for (np = hashtab[hash(s)]; np != NULL; np = np->next)
        if (strcmp(s, np->name) == 0)
          return np; /* found */
    return NULL; /* not found */
}

char *strdup(char *);
/* install: put (name, defn) in hashtab */
struct nlist *install(char *name, char *defn)
{
    struct nlist *np;
    unsigned hashval;
    if ((np = lookup(name)) == NULL) { /* not found */
        np = (struct nlist *) malloc(sizeof(*np));
        if (np == NULL || (np->name = strdup(name)) == NULL)
          return NULL;
        hashval = hash(name);
        np->next = hashtab[hashval];
        hashtab[hashval] = np;
    } else /* already there */
        free((void *) np->defn); /*free previous defn */
    if ((np->defn = strdup(defn)) == NULL)
       return NULL;
    return np;
}

char *strdup(char *s) /* make a duplicate of s */
{
    char *p;
    p = (char *) malloc(strlen(s)+1); /* +1 for ’\0’ */
    if (p != NULL)
       strcpy(p, s);
    return p;
}

두 문자열의 해시가 충돌하면 O(n)조회 시간이 발생할 수 있습니다 . 값을 증가시켜 충돌 가능성을 줄일 수 있습니다 HASHSIZE. 데이터 구조에 대한 자세한 내용은이 책을 참조하십시오.

가장 빠른 방법은 같은, 이미 기존의 구현을 사용하는 것입니다 uthash .

그리고, 당신이 경우 정말 코드에 원하는 그것을 너 자신에서 알고리즘 uthash을 조사하고 다시 사용할 수 있습니다. 그것은 BSD 라이센스이므로 저작권 통지를 전달하는 것 외에는 할 수있는 일이 무제한입니다.

쉬운 구현을 위해 배열을 순진하게 검색하는 것은 어렵습니다. 일부 오류 검사 외에, 이것은 완전한 구현입니다 (예상되지 않음).

typedef struct dict_entry_s {
    const char *key;
    int value;
} dict_entry_s;

typedef struct dict_s {
    int len;
    int cap;
    dict_entry_s *entry;
} dict_s, *dict_t;

int dict_find_index(dict_t dict, const char *key) {
    for (int i = 0; i < dict->len; i++) {
        if (!strcmp(dict->entry[i], key)) {
            return i;
        }
    }
    return -1;
}

int dict_find(dict_t dict, const char *key, int def) {
    int idx = dict_find_index(dict, key);
    return idx == -1 ? def : dict->entry[idx].value;
}

void dict_add(dict_t dict, const char *key, int value) {
   int idx = dict_find_index(dict, key);
   if (idx != -1) {
       dict->entry[idx].value = value;
       return;
   }
   if (dict->len == dict->cap) {
       dict->cap *= 2;
       dict->entry = realloc(dict->entry, dict->cap * sizeof(dict_entry_s));
   }
   dict->entry[dict->len].key = strdup(key);
   dict->entry[dict->len].value = value;
   dict->len++;
}

dict_t dict_new(void) {
    dict_s proto = {0, 10, malloc(10 * sizeof(dict_entry_s))};
    dict_t d = malloc(sizeof(dict_s));
    *d = proto;
    return d;
}

void dict_free(dict_t dict) {
    for (int i = 0; i < dict->len; i++) {
        free(dict->entry[i].key);
    }
    free(dict->entry);
    free(dict);
}

해시에 따라 간단한 해시 함수와 일부 링크 된 구조 목록을 작성하여 값을 삽입 할 링크 된 목록을 지정하십시오. 해시를 사용하여 검색하십시오.

나는 얼마 전에 간단한 구현을했다.

...
#define K 16 // 연쇄 계수

구조체 dict
{
    char * 이름; / * 키 이름 * /
    int val; / * 값 * /
    구조체 dict * next; / * 링크 필드 * /
};

typedef 구조체 dict dict;
dict *table[K];
int initialized = 0;


void  putval ( char *,int);

void init_dict()
{   
    initialized = 1;
    int i;  
    for(i=0;iname = (char *) malloc (strlen(key_name)+1);
    ptr->val = sval;
    strcpy (ptr->name,key_name);


    ptr->next = (struct dict *)table[hsh];
    table[hsh] = ptr;

}


int getval ( char *key_name )
{   
    int hsh = hash(key_name);   
    dict *ptr;
    for (ptr = table[hsh]; ptr != (dict *) 0;
        ptr = (dict *)ptr->next)
    if (strcmp (ptr->name,key_name) == 0)
        return ptr->val;
    return -1;
}

here is a quick implement, i used it to get a 'Matrix'(sruct) from a string. you can have a bigger array and change its values on the run also:

typedef struct  { int** lines; int isDefined; }mat;
mat matA, matB, matC, matD, matE, matF;

/* an auxilary struct to be used in a dictionary */
typedef struct  { char* str; mat *matrix; }stringToMat;

/* creating a 'dictionary' for a mat name to its mat. lower case only! */
stringToMat matCases [] =
{
    { "mat_a", &matA },
    { "mat_b", &matB },
    { "mat_c", &matC },
    { "mat_d", &matD },
    { "mat_e", &matE },
    { "mat_f", &matF },
};

mat* getMat(char * str)
{
    stringToMat* pCase;
    mat * selected = NULL;
    if (str != NULL)
    {
        /* runing on the dictionary to get the mat selected */
        for(pCase = matCases; pCase != matCases + sizeof(matCases) / sizeof(matCases[0]); pCase++ )
        {
            if(!strcmp( pCase->str, str))
                selected = (pCase->matrix);
        }
        if (selected == NULL)
            printf("%s is not a valid matrix name\n", str);
    }
    else
        printf("expected matrix name, got NULL\n");
    return selected;
}

I am surprised no one mentioned hsearch/hcreate set of libraries which although is not available on windows, but is mandated by POSIX, and therefore available in Linux / GNU systems.

The link has a simple and complete basic example that very well explains its usage.

It even has thread safe variant, is easy to use and very performant.

A hashtable is the traditional implementation of a simple "Dictionary". If you don't care about speed or size, just google for it. There are many freely available implementations.

here's the first one I saw -- at a glance, it looks ok to me. (it's pretty basic. If you really want it to hold an unlimited amount of data, then you'll need to add some logic to "realloc" the table memory as it grows.)

good luck!

GLib and gnulib

These are your likely best bets if you don't have more specific requirements, since they are widely available, portable and likely efficient.

GLib: https://developer.gnome.org/glib/ by GNOME project. Several containers documented at: https://developer.gnome.org/glib/stable/glib-data-types.html including "Hash Tables" and "Balanced Binary Trees". License: LGPL
gnulib: https://www.gnu.org/software/gnulib/ by the GNU project. You are meant to copy paste the source into your code. Several containers documented at: https://www.gnu.org/software/gnulib/MODULES.html#ansic_ext_container including "rbtree-list", "linkedhash-list" and "rbtreehash-list". GPL license.

Hashing is the key. I think use lookup table and hashing key for this. You can find many hashing function online.

The quickest method would be using binary tree. Its worst case is also only O(logn).

참고URL : https://stackoverflow.com/questions/4384359/quick-way-to-implement-dictionary-in-c

'IT story' 카테고리의 다른 글

Oauth-로컬 URL로 테스트하는 방법은 무엇입니까? (0)	2020.07.19
유창한 검증 대 데이터 주석 (0)	2020.07.19
Azure Blob Storage 및 파일 서비스 (0)	2020.07.19
Xml 직렬화-null 값 숨기기 (0)	2020.07.18
자체 중괄호 내부의 루프 (0)	2020.07.18

현재글C에서 사전을 구현하는 빠른 방법

hot-time

C에서 사전을 구현하는 빠른 방법

C에서 사전을 구현하는 빠른 방법

'IT story' 카테고리의 다른 글

'IT story'의 다른글

티스토리툴바

C에서 사전을 구현하는 빠른 방법

C에서 사전을 구현하는 빠른 방법

'IT story' 카테고리의 다른 글

'IT story'의 다른글

관련글

티스토리툴바