プログラミング言語 C

(tree node の領域を確保する) 関数 talloc は次のように書くことができる。

#include /* 関数 malloc に必要 */
struct Tnode *talloc(void)
{
return (struct Tnode *)malloc(sizeof(struct Tnode));
}

malloc は void型のポインタを返すので、そのポインタをキャストによって望む形へと明示的に変換しておくようにする。
関数 str_dup は引数によって与えられた文字列を、malloc を呼び出すことによって確保した安全な場所に copy する。

char *str_dup(char *s)
{
char *p;
p = (char *)malloc(str_len(s) + 1); /* '\0' 分を 1文字加えておく */
if (p != NULL)
str_cpy(p, s);
return p;
}

関数 tree_print は tree を辞書順に print していく。
それぞれのノードでは、まず左の subtree (より小さい側) が print され、次に (そのノードの) word が、最後に右の subtree (より大きい側) が print される。
もし再帰 (recusion works) の理解が不十分だと思うなら、上の例で示された tree を関数 tree_print を使ってシュミレートしてみるとよい。

/* tree_print : tree 辞書順に print する */
void tree_print(struct Tnode *p)
{
if (p != NULL) {
tree_print(p->left);
printf("%4d %s\n", p->cnt, p->word);
tree_print(p->right);
}
}

(p171-173)

(完成したプログラム)

/* cnt_wd.c */
#include
#include
#include
#define MAX 100
#define BUFSIZE 100
struct Tnode { /* the tree node */
char *word; /* points to the text */
int cnt; /* member of occurrence */
struct Tnode *left; /* left child */
struct Tnode *right; /* right child */
};
struct Tnode *add_tree(struct Tnode *, char *);
struct Tnode *talloc(void);
char *str_dup(char *);
int str_cmp(char *, char *);
int str_len(char *);
void str_cpy(char *, char *);
void tree_print(struct Tnode *);
int get_word(char *, int);
int get_ch(void);
void unget_ch(int);
char buf[BUFSIZE]; /* buffer for unget_ch */
int buf_p = 0; /* next free position in buf */
/* word frequency count */
main()
{
struct Tnode *root;
char word[MAX];
root = NULL;
while (get_word(word, MAX) != EOF)
if (isalpha(word[0]))
root = add_tree(root, word);
tree_print(root);
return 0;
}
/*add_tree : add a node with w, at or below p */
struct Tnode *add_tree(struct Tnode *p, char *w)
{
int cond;
if (p == NULL) { /* a new word has arrived */
p = talloc(); /* make a new node */
p->word = str_dup(w);
p->cnt = 1;
p->left = p->right = NULL;
} else if ((cond = str_cmp(w, p->word)) == 0)
p->cnt++; /* repeated word */
else if (cond < 0) /* less than into left subtree */
p->left = add_tree(p->left, w);
else /* greater than into right subtree */
p->right = add_tree(p->right, w);
return p;
}
/* talloc : make a tree node */
struct Tnode *talloc(void)
{
return (struct Tnode *)malloc(sizeof(struct Tnode));
}
/*str_dup : make a duplicate of s */
char *str_dup(char *s)
{
char *p;
p = (char *)malloc(str_len(s) + 1); /* +1 for '\0' */
if (p != NULL)
str_cpy(p, s);
return p;
}
/* str_cmp : return '< 0' if s < t, '0' if s == t, '> 0' if s > t */
int str_cmp(char *s, char *t)
{
for ( ; *s == *t; s++, t++)
if (*s == '\0')
return 0;
return *s - *t;
}
/* str_len : return length of string s */
int str_len(char *s)
{
char *p = s;
while (*p != '\0')
p++;
return p - s;
}
/* str_cpy : copy t to s */
void str_cpy(char *s, char *t)
{
int i;
i = 0;
while ((*s = *t) != '\0') {
s++;
t++;
}
}
/* tree_print : in-order print of tree p */
void tree_print(struct Tnode *p)
{
if (p != NULL) {
tree_print(p->left);
printf("%4d %s\n", p->cnt, p->word);
tree_print(p->right);
}
}
/* get_word : get next word or character from input */
int get_word(char *word, int limit)
{
int c;
char *pw = word;
while (isspace(c = get_ch()))
;
if (c != EOF)
*pw++ = c;
if (!isalpha(c)) {
*pw = '\0';
return c;
}
for ( ; --limit > 0; pw++)
if (!isalnum(*pw = get_ch())) {
unget_ch(*pw);
break;
}
*pw = '\0';
return word[0];
}
/* get_ch : get a (possibly pushed-back) character */
int get_ch(void)
{
return (buf_p > 0) ? buf[--buf_p] : getchar();
}
/* unget_ch : push character back on input */
void unget_ch(int c)
{
if (buf_p > BUFSIZE)
printf("unget_ch : too many characters\n");
else
buf[buf_p++] = c;
}

コンパイルしてみる。
$cc -o cnt_wd cnt_wd.c
次に文字列テキストを用意。(例えばこんなの -> song.txt)

Where have all the flowers gone?
Long time passing
Where have all the flowers gone?
Long time ago
Where have all the flowers gone?
Girls have pick them every one
When will they ever learn?
When will they ever learn?

$./cnt_wd < song.txt として確認。

(注) ヘッダファイル string.h には標準関数として strcmp, strlen, strcpy がそれぞれ入ってるので、それらを使えば上のコードをもっと短くすることができます。
(追記) カッコとセミコロンが一部抜けていたので訂正。
(さらに追記)ヘッダファイル名がまちがっていたので訂正 (<- 以前のままでもなぜか正常に実行できますが ... )