単語数算出を二分木でやる

K&R本 6.5

単語数の算出処理に二分木の話が出てきたので復習がてら実装してみる。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define WORDMAX 100

struct NODE {
    char *word;
    int count;
    struct NODE *left;
    struct NODE *right;
};
typedef struct NODE NODE;

NODE *create_node (char *str);
NODE *add_node (NODE **head,char *str);
void preorder(NODE *n);
int getword (char *word,int max);

NODE *create_node (char *str) {
    NODE *node;
    size_t len = strlen(str);
    node = (NODE *)malloc(sizeof(NODE));
    if ( node == NULL ) {
        return NULL;
    }
    node->count = 1;
    node->right = node->left = NULL;
    node->word = (char *)malloc(len+1);
    if ( node->word == NULL ) {
        return NULL;
    }
    strcpy_s(node->word,len+1,str);
    return node;
}

NODE *add_node (NODE **head,char *str) {
    NODE **p;
    int strtype;
    if ( *head == NULL ) {
        *head = create_node(str);
        return *head;
    }
    
    p = head;
    while( *p!=NULL ) {
        strtype = strcmp(str,(*p)->word);
        if ( strtype == 0 ) {
            (*p)->count++;
            return *p;
        }
        if ( strtype > 0 ) {
            p = &(*p)->right;
        }
        else {
            p = &(*p)->left;
        }
    }
    
    *p = create_node(str);
    return *p;
}

void preorder(NODE *n){
    if( n == NULL ) return;
    printf("%s = %d\n", n->word,n->count );
    preorder( n->left );
    preorder( n->right );
}

// とりあえず英数字_のみ
int getword (char *word,int max) {
    int c;
    while( (c=getchar()) != EOF ) {
        if ( (isalnum(c)||c=='_') && --max ) {
            *word++ = (char)c;
        }
        else {
            break;
        }
    }
    *word = '\0';
    return c;
}

int main (void) {
    NODE *head = NULL;
    char str[WORDMAX];
    
    while ( getword(str,WORDMAX) != EOF ) {
        if ( str[0] != '\0' ) add_node(&head,str);
    }
    
    preorder(head);
    return 0;
}
$ cat main.c | main
include = 4
h = 4
ctype = 1
WORDMAX = 3
100 = 1
1 = 3
0 = 6
NODE = 18
EOF = 2
NULL = 9
char = 10
add_node = 3
_ = 2
c = 6
break = 1
count = 4
create_node = 4
define = 1
d = 1
getword = 3
else = 2
getchar = 1
head = 9
if = 8
stdio = 1
int = 8
left = 4
isalnum = 1
right = 4
preorder = 5
n = 8
max = 3
len = 3
malloc = 2
main = 1
node = 10
p = 13
return = 9
printf = 1
size_t = 1
s = 1
sizeof = 1
string = 1
stdlib = 1
str = 13
strcpy_s = 1
strcmp = 1
struct = 4
strlen = 1
strtype = 4
word = 10
typedef = 1
void = 3
while = 3

思ったよりもスムーズに実装できた。二分木は何度か実装してるお蔭だろうか。

getword関数はとりあえず英数字_を対象に処理してるだけのとてもシンプルなものにした。そのせいで整数定数までもカウントされてしまっているが今回は良しとする。