| summaryrefslogtreecommitdiff |
diff options
| author | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-07-24 22:42:09 +0200 |
|---|---|---|
| committer | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-07-24 22:42:09 +0200 |
| commit | 8994c7b5cf56f540c71c763173a8927569ba94b3 (patch) | |
| tree | 5b7822bd827593f612c1e103df92001781ac72c6 | |
| parent | f74d57fee040218af039f8157cf645d0902ad300 (diff) | |
Experimenting with K Order Markovian chains.
I do not recommend using this branch ATM, it has not been tested.
| -rw-r--r-- | README.md | 44 | ||||
| -rw-r--r-- | src/core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/core/assimilate.c | 263 | ||||
| -rw-r--r-- | src/core/create_sentences.c | 359 | ||||
| -rw-r--r-- | src/core/knowledge.c | 149 | ||||
| -rw-r--r-- | src/core/knowledge.h | 18 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 24 | ||||
| -rw-r--r-- | src/core/sequence.c | 129 | ||||
| -rw-r--r-- | src/io/error.h | 2 | ||||
| -rw-r--r-- | src/pervasive.h | 4 | ||||
| -rw-r--r-- | src/tool/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/tool/sorted_list.c | 79 | ||||
| -rw-r--r-- | src/tool/sorted_list.h | 19 |
13 files changed, 787 insertions, 305 deletions
@@ -15,10 +15,54 @@ $ cd build $ cmake .. $ make $ ./zero_of_one -h +Usage: ./zero_of_one [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...] +NICKNAME is used as the IRC nickname value. +If NICKNAME or any ALIAS is found in an event, the program will reply. + +Available options: + [--data-filename | -df] FILENAME + Learn content from FILENAME before connecting. + Default: ./memory.txt. + [--new-data-filename | -ndf] FILENAME + Store new data learned in FILENAME. + Default: value of the --data-filename param. + [--irc-server-addr | -isa] IRC_SERVER_ADDR + Connect to this server address. + Default: irc.foonetic.net. + [--irc-server-port | -isp] IRC_SERVER_PORT + Connect to this server port. + Default: 6667. + [--irc-server-channel | -isc] IRC_SERVER_CHANNEL + Connect to this server's channel. + Default: #theborghivemind. + [--irc-username | -iu] USERNAME + Connect using this as 'username' (shown in WHOIS). + Default: zeroofone. + [--irc-realname | -ir] REALNAME + Connect using this as 'realname' (shown in WHOIS). + Default: Zero of One (bot). + [--reply-rate | -rr] REPLY_RATE + Chance to reply to an event (integer, range [0, 100]). + Default: 8. ``` +Note that if the NICKNAME has a uppercase character, it will never be matched by +any inputs, as all inputs are converted to lowercase. A simple solution is to +have a lowercase version of NICKNAME in the ALIAS list. ## Main Objectives - POSIX compliance. - Paranoia levels of error checking. - Low RAM usage. - Giggles. + +## Behavior +- Replies when it reads messages containing a word matching its NICKNAME or one + of its ALIASes. +- Replies to messages with a REPLY_RATE percent chance. The word used to + construct the reply is the less known one. +- Reacts to user joining in. If the username is in enough samples, it is used + to construct the greeting, otherwise a random word is selected as starting + point. +- Repetition is taken into account and augments the strength of the links. +- Some punctuation symbols are identified (e.g. "example." will be understood + as "example" followed by "."). diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 2722355..a8ec17e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -4,6 +4,7 @@ set( ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c ${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c ) set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/core/assimilate.c b/src/core/assimilate.c index 28f4dd4..49efe23 100644 --- a/src/core/assimilate.c +++ b/src/core/assimilate.c @@ -7,121 +7,109 @@ /** Functions to assimilate sentences using a ZoO_knowledge structure *********/ -static int link_to + +static int add_sequence ( - ZoO_index links_count [const restrict static 1], - ZoO_index * links_occurrences [const restrict static 1], - ZoO_index * links [const restrict static 1], - ZoO_index const target + ZoO_index links_count [const], + struct ZoO_knowledge_link * links [const], + ZoO_index const sequence [const restrict static ZoO_MARKOV_ORDER], + ZoO_index const target_i, + ZoO_index const offset ) { - ZoO_index i, * new_p; + ZoO_index link_index, i; + struct ZoO_knowledge_link * link; + ZoO_index * new_p; - for (i = 0; i < *links_count; ++i) + if (ZoO_knowledge_get_link(links_count, links, (sequence + offset), &link_index) < 0) { - if ((*links)[i] == target) - { - if ((*links_occurrences)[i] == ZoO_INDEX_MAX) - { - ZoO_S_WARNING - ( - "Maximum link occurrences count has been reached." - ); + return -1; + } - return -1; - } + link = (*links + link_index); + link->occurrences += 1; - (*links_occurrences)[i] += 1; + for (i = 0; i < link->targets_count; ++i) + { + if (link->targets[i] == sequence[target_i]) + { + link->targets_occurrences[i] += 1; return 0; } } - if (*links_count == ZoO_INDEX_MAX) - { - ZoO_S_WARNING("Maximum links count has been reached."); - - return -1; - } + link->targets_count += 1; new_p = (ZoO_index *) realloc ( - *links_occurrences, - ( - ( - /* Safe: *links_count < ZoO_INDEX_MAX */ - (size_t) (*links_count + 1) - ) - * sizeof(ZoO_index) - ) + (void *) link->targets, + (sizeof(ZoO_index) * link->targets_count) ); if (new_p == (ZoO_index *) NULL) { - ZoO_S_ERROR("Could not reallocate a link occurrences list."); + link->targets_count -= 1; + /* TODO: err. */ return -1; } - new_p[*links_count] = 1; - - *links_occurrences = new_p; + link->targets = new_p; + link->targets[link->targets_count - 1] = sequence[target_i]; new_p = (ZoO_index *) realloc ( - *links, - ( - ( - /* Safe: *links_count < ZoO_INDEX_MAX */ - (size_t) (*links_count + 1) - ) * sizeof(ZoO_index) - ) + (void *) link->targets_occurrences, + (sizeof(ZoO_index) * link->targets_count) ); if (new_p == (ZoO_index *) NULL) { - ZoO_S_ERROR("Could not reallocate a link list."); + link->targets_count -= 1; + /* TODO: err. */ return -1; } - new_p[*links_count] = target; - - *links = new_p; - - *links_count += 1; + link->targets_occurrences = new_p; + link->targets_occurrences[link->targets_count - 1] = 1; return 0; } -static int link_words +static int add_word_occurrence ( struct ZoO_knowledge k [const restrict static 1], - ZoO_index const a, - ZoO_index const b + ZoO_index const sequence [const static ((ZoO_MARKOV_ORDER * 2) + 1)] ) { + ZoO_index w; int error; + w = sequence[ZoO_MARKOV_ORDER]; + error = - link_to + add_sequence ( - &(k->words[a].forward_links_count), - &(k->words[a].forward_links_occurrences), - &(k->words[a].forward_links), - b + &(k->words[w].forward_links_count), + &(k->words[w].forward_links), + sequence + (ZoO_MARKOV_ORDER + 1), + (ZoO_MARKOV_ORDER - 1), + 0 ); error = ( - link_to + add_sequence ( - &(k->words[b].backward_links_count), - &(k->words[b].backward_links_occurrences), - &(k->words[b].backward_links), - a + &(k->words[w].backward_links_count), + &(k->words[w].backward_links), + sequence, + 0, + 1 ) | error ); @@ -129,83 +117,144 @@ static int link_words return error; } -int ZoO_knowledge_assimilate +static int should_assimilate ( - struct ZoO_knowledge k [const static 1], struct ZoO_strings string [const restrict static 1], ZoO_index const aliases_count, const char * restrict aliases [const restrict static aliases_count] ) { - int error; - ZoO_index curr_word, next_word; - ZoO_index curr_word_id, next_word_id; - - curr_word = 0; + ZoO_index i; + /* Don't assimilate empty strings. */ if (string->words_count == 0) { return 0; } - for (curr_word = 0; curr_word < aliases_count; ++curr_word) + /* Don't assimilate things that start with our name. */ + for (i = 0; i < aliases_count; ++i) { - if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0])) + if (ZoO_IS_PREFIX(aliases[i], string->words[0])) { return 0; } } - curr_word = 0; + return 1; +} + +static int init_sequence +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index sequence [const restrict static ((ZoO_MARKOV_ORDER * 2) + 1)] +) +{ + ZoO_index i; + + sequence[0] = ZoO_WORD_START_OF_LINE; - if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0) + for (i = 0; i < ZoO_MARKOV_ORDER; ++i) + { + sequence[ZoO_MARKOV_ORDER - i] = ZoO_WORD_START_OF_LINE; + + if (i < string->words_count) + { + if + ( + ZoO_knowledge_learn + ( + k, + string->words[i], + (sequence + (ZoO_MARKOV_ORDER + i + 1)) + ) < 0 + ) + { + return -1; + } + } + else + { + sequence[ZoO_MARKOV_ORDER + i + 1] = ZoO_WORD_END_OF_LINE; + } + } + + return 0; +} + +int ZoO_knowledge_assimilate +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index const aliases_count, + const char * restrict aliases [const restrict static aliases_count] +) +{ + int error; + ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1]; + ZoO_index next_word, new_word, new_word_id; + + if (!should_assimilate(string, aliases_count, aliases)) + { + return 0; + } + + if (init_sequence(k, string, sequence) < 0) { return -1; } - if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0) + if (add_word_occurrence(k, sequence) < 0) { error = -1; - ZoO_WARNING - ( - "Could not indicate that '" - ZoO_CHAR_STRING_SYMBOL - "' was the first word of the sentence.", - string->words[0] - ); - } + /* There's a pun... */ + ZoO_S_WARNING("Could not add a link between words."); - next_word = 1; + return -1; + } error = 0; - while (next_word < string->words_count) + next_word = 0; + new_word = ZoO_MARKOV_ORDER; + + while (next_word <= string->words_count) { - /* prevents words [restrict], k [restrict] */ - if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0) + if (new_word < string->words_count) { - return -1; + /* prevents words [restrict], k [restrict] */ + if (ZoO_knowledge_learn(k, string->words[new_word], &new_word_id) < 0) + { + return -1; + } + } + else + { + new_word_id = ZoO_WORD_END_OF_LINE; } - if (link_words(k, curr_word_id, next_word_id) < 0) + memmove + ( + (void *) sequence, + (const void *) (sequence + 1), + /* Accepts 0. */ + (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER * 2)) + ); + + sequence[ZoO_MARKOV_ORDER * 2] = new_word_id; + + if (add_word_occurrence(k, sequence) < 0) { error = -1; - ZoO_WARNING - ( - "Could not add a link between words '" - ZoO_CHAR_STRING_SYMBOL - "' and '" - ZoO_CHAR_STRING_SYMBOL - "'.", - string->words[curr_word], - string->words[next_word] - ); + /* There's a pun... */ + ZoO_S_WARNING("Could not add a link between words."); + + return -1; } - curr_word = next_word; - curr_word_id = next_word_id; /* * Safe: * - next_word < words_count @@ -214,19 +263,7 @@ int ZoO_knowledge_assimilate * next_word < ZoO_INDEX_MAX */ next_word += 1; - } - - if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0) - { - error = -1; - - ZoO_WARNING - ( - "Could not indicate that '" - ZoO_CHAR_STRING_SYMBOL - "' was the last word of the sentence.", - string->words[curr_word_id] - ); + new_word += 1; } return error; diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c index 6b69111..a3640dc 100644 --- a/src/core/create_sentences.c +++ b/src/core/create_sentences.c @@ -18,11 +18,10 @@ * (== (sum links_occurrences) occurrences). * (can_store ZoO_index (length links)). */ -static ZoO_index pick_an_index +static ZoO_index pick_index ( ZoO_index const occurrences, - const ZoO_index links_occurrences [const restrict static 1], - const ZoO_index links [const restrict static 1] + const ZoO_index links_occurrences [const restrict static 1] ) { ZoO_index result, accumulator, random_number; @@ -62,13 +61,13 @@ static ZoO_index pick_an_index } /* Safe: (< result (length links)) */ - return links[result]; + return result; } static unsigned char * extend_left ( struct ZoO_knowledge k [const restrict static 1], - ZoO_index word_id, + ZoO_index sequence [const restrict static ZoO_MARKOV_ORDER], ZoO_char current_sentence [static 1], size_t sentence_size [const restrict static 1], ZoO_index credits [const static 1] @@ -77,17 +76,7 @@ static unsigned char * extend_left size_t addition_size; struct ZoO_knowledge_word * w; ZoO_char * next_sentence; - - w = (k->words + word_id); - - if - ( - (w->special == ZoO_WORD_STARTS_SENTENCE) - || (w->occurrences == 0) - ) - { - return current_sentence; - } + ZoO_index j; /* prevents current_sentence [restrict] */ next_sentence = current_sentence; @@ -100,15 +89,8 @@ static unsigned char * extend_left } *credits -= 1; - word_id = - pick_an_index - ( - w->occurrences, - w->backward_links_occurrences, - w->backward_links - ); - w = (k->words + word_id); + w = (k->words + sequence[ZoO_MARKOV_ORDER - 1]); switch (w->special) { @@ -205,13 +187,51 @@ static unsigned char * extend_left /* prevents current_sentence [const] */ current_sentence = next_sentence; + + memmove + ( + (void *) (sequence + 1), + (const void *) sequence, + /* Accepts 0. */ + (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1)) + ); + + if + ( + ZoO_knowledge_find_link + ( + w->backward_links_count, + w->backward_links, + (sequence + 1), + &j + ) + < 0 + ) + { + ZoO_S_ERROR("Unexpectedly, no backtracking link was found."); + + break; + } + + sequence[0] = + w->backward_links[j].targets + [ + pick_index + ( + w->backward_links[j].occurrences, + w->backward_links[j].targets_occurrences + ) + ]; + + /* prevents current_sentence [const] */ + current_sentence = next_sentence; } } static unsigned char * extend_right ( struct ZoO_knowledge k [const restrict static 1], - ZoO_index word_id, + ZoO_index sequence [const restrict static ZoO_MARKOV_ORDER], ZoO_char current_sentence [static 1], size_t sentence_size [const restrict static 1], ZoO_index credits [const static 1] @@ -220,17 +240,7 @@ static unsigned char * extend_right size_t addition_size; struct ZoO_knowledge_word * w; ZoO_char * next_sentence; - - w = (k->words + word_id); - - if - ( - (w->special == ZoO_WORD_ENDS_SENTENCE) - || (w->occurrences == 0) - ) - { - return current_sentence; - } + ZoO_index j; /* prevents current_sentence [restrict] */ next_sentence = current_sentence; @@ -244,15 +254,7 @@ static unsigned char * extend_right *credits -= 1; - word_id = - pick_an_index - ( - w->occurrences, - w->forward_links_occurrences, - w->forward_links - ); - - w = (k->words + word_id); + w = (k->words + sequence[0]); switch (w->special) { @@ -340,95 +342,233 @@ static unsigned char * extend_right /* prevents current_sentence [const] */ current_sentence = next_sentence; + + memmove + ( + (void *) sequence, + (const void *) (sequence + 1), + /* Accepts 0. */ + (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1)) + ); + + if + ( + ZoO_knowledge_find_link + ( + w->forward_links_count, + w->forward_links, + sequence, + &j + ) + < 0 + ) + { + ZoO_S_ERROR("Unexpectedly, no forward link was found."); + + break; + } + + sequence[ZoO_MARKOV_ORDER - 1] = + w->forward_links[j].targets + [ + pick_index + ( + w->forward_links[j].occurrences, + w->forward_links[j].targets_occurrences + ) + ]; } } -int ZoO_knowledge_extend +static ZoO_index select_first_word ( struct ZoO_knowledge k [const static 1], const struct ZoO_strings string [const], - int const ignore_first_word, - ZoO_char * result [const static 1] + int const ignore_first_word ) { - int word_found; - size_t sentence_size; - ZoO_index i, word_id, word_min_score, word_min_id, credits; + ZoO_index i, word_id, word_min_score, word_min_id; + ZoO_index word_found; - credits = ZoO_MAX_REPLY_WORDS; + if (string == (struct ZoO_strings *) NULL) + { + return word_min_id = (rand() % k->words_count); + } - if (string != (struct ZoO_strings *) NULL) + if (ignore_first_word) + { + i = 1; + } + else { - word_found = 0; + i = 0; + } - if (ignore_first_word) - { - i = 1; - } - else - { - i = 0; - } + word_found = 0; - for (; i < string->words_count; ++i) + for (; i < string->words_count; ++i) + { + /* prevents k [restrict] */ + if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0) { - /* prevents k [restrict] */ - if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0) - { - word_found = 1; - word_min_score = k->words[word_min_id].occurrences; + word_found = 1; + word_min_score = k->words[word_min_id].occurrences; - break; - } + break; } + } + + if (word_found == 0) + { + return word_min_id = (rand() % k->words_count); + } - if (word_found == 0) + for (; i < string->words_count; ++i) + { + if + ( + (ZoO_knowledge_find(k, string->words[i], &word_id) == 0) + && (k->words[word_id].occurrences < word_min_score) + ) { - word_min_id = (rand() % k->words_count); - word_min_score = k->words[word_min_id].occurrences; + word_min_score = k->words[word_id].occurrences; + word_min_id = word_id; } + } - for (; i < string->words_count; ++i) - { - if + return word_min_id; +} + +static void init_sequence +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const], + int const ignore_first_word, + ZoO_index sequence [const static (ZoO_MARKOV_ORDER * 2) + 1] +) +{ + ZoO_index i, j, accumulator, random_number; + struct ZoO_knowledge_word * fiw; + + sequence[ZoO_MARKOV_ORDER] = select_first_word(k, string, ignore_first_word); + + fiw = (k->words + sequence[ZoO_MARKOV_ORDER]); + + for (i = 0; i < ZoO_MARKOV_ORDER; ++i) + { + sequence[ZoO_MARKOV_ORDER - i - 1] = ZoO_WORD_START_OF_LINE; + sequence[ZoO_MARKOV_ORDER + i + 1] = ZoO_WORD_END_OF_LINE; + } +/* + i = 0; + accumulator = 0; + + random_number = (((ZoO_index) rand()) % fiw->occurrences); + + while (accumulator < random_number) + { + i += 1; + accumulator += fiw->forward_links[i].occurrences; + } +*/ + if (fiw->forward_links_count == 0) + { + ZoO_S_FATAL("First word has no forward links."); + } + + i = (((ZoO_index) rand()) % fiw->forward_links_count); + + memcpy + ( + (void *) (sequence + ZoO_MARKOV_ORDER + 1), + fiw->forward_links[i].sequence, + sizeof(ZoO_index) * (ZoO_MARKOV_ORDER - 1) + ); + + sequence[ZoO_MARKOV_ORDER * 2] = + fiw->forward_links[i].targets + [ + pick_index ( - (ZoO_knowledge_find(k, string->words[i], &word_id) == 0) - && (k->words[word_id].occurrences < word_min_score) + fiw->forward_links[i].occurrences, + fiw->forward_links[i].targets_occurrences ) - { - word_min_score = k->words[word_id].occurrences; - word_min_id = word_id; - } - } - } - else + ]; + + for (i = 1; i <= ZoO_MARKOV_ORDER; ++i) { - word_min_id = (rand() % k->words_count); + fiw = (k->words + sequence[(ZoO_MARKOV_ORDER * 2) - i]); + + if + ( + ZoO_knowledge_find_link + ( + fiw->backward_links_count, + fiw->backward_links, + sequence + (ZoO_MARKOV_ORDER - i + 1), + &j + ) + < 0 + ) + { + ZoO_S_ERROR("Unexpectedly, no back link was found."); + + break; + } + + sequence[ZoO_MARKOV_ORDER - i] = + fiw->backward_links[j].targets + [ + pick_index + ( + fiw->backward_links[j].occurrences, + fiw->backward_links[j].targets_occurrences + ) + ]; } +} + +int ZoO_knowledge_extend +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const], + int const ignore_first_word, + ZoO_char * result [const static 1] +) +{ + int word_found; + size_t sentence_size; + ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1]; + ZoO_index first_word, credits; + + credits = ZoO_MAX_REPLY_WORDS; + + init_sequence(k, string, ignore_first_word, sequence); + first_word = sequence[ZoO_MARKOV_ORDER]; /* 3: 2 spaces + '\0' */ /* FIXME: not overflow-safe */ - switch (k->words[word_min_id].special) + switch (k->words[first_word].special) { case ZoO_WORD_REMOVES_LEFT_SPACE: case ZoO_WORD_REMOVES_RIGHT_SPACE: /* word + ' ' + '\0' */ - sentence_size = (strlen(k->words[word_min_id].word) + 2); + sentence_size = (strlen(k->words[first_word].word) + 2); break; case ZoO_WORD_HAS_NO_EFFECT: /* word + ' ' * 2 + '\0' */ - sentence_size = (strlen(k->words[word_min_id].word) + 3); + sentence_size = (strlen(k->words[first_word].word) + 3); break; default: ZoO_WARNING ( "'%s' was unexpectedly selected as pillar.", - k->words[word_min_id].word + k->words[first_word].word ); /* word + '[' + ']' + ' ' * 2 + '\0' */ - sentence_size = (strlen(k->words[word_min_id].word) + 5); + sentence_size = (strlen(k->words[first_word].word) + 5); break; } @@ -441,7 +581,7 @@ int ZoO_knowledge_extend return -2; } - switch (k->words[word_min_id].special) + switch (k->words[first_word].special) { case ZoO_WORD_REMOVES_LEFT_SPACE: snprintf @@ -449,7 +589,7 @@ int ZoO_knowledge_extend *result, sentence_size, ZoO_CHAR_STRING_SYMBOL " ", - k->words[word_min_id].word + k->words[first_word].word ); break; @@ -459,7 +599,7 @@ int ZoO_knowledge_extend *result, sentence_size, " " ZoO_CHAR_STRING_SYMBOL, - k->words[word_min_id].word + k->words[first_word].word ); break; @@ -469,7 +609,7 @@ int ZoO_knowledge_extend *result, sentence_size, " " ZoO_CHAR_STRING_SYMBOL " ", - k->words[word_min_id].word + k->words[first_word].word ); break; @@ -479,27 +619,36 @@ int ZoO_knowledge_extend *result, sentence_size, " [" ZoO_CHAR_STRING_SYMBOL "] ", - k->words[word_min_id].word + k->words[first_word].word ); break; } - if ((word_min_score == 0) || (credits == 0)) - { - return 0; - } - - --credits; - /* prevents result [restrict] */ - *result = extend_left(k, word_min_id, *result, &sentence_size, &credits); + *result = + extend_right + ( + k, + (sequence + ZoO_MARKOV_ORDER + 1), + *result, + &sentence_size, + &credits + ); if (*result == (ZoO_char *) NULL) { return -2; } - *result = extend_right(k, word_min_id, *result, &sentence_size, &credits); + *result = + extend_left + ( + k, + sequence, + *result, + &sentence_size, + &credits + ); if (*result == (ZoO_char *) NULL) { diff --git a/src/core/knowledge.c b/src/core/knowledge.c index 9b4e250..a2bf708 100644 --- a/src/core/knowledge.c +++ b/src/core/knowledge.c @@ -3,6 +3,7 @@ #include <stdint.h> /* defines SIZE_MAX */ #include "../io/error.h" +#include "../tool/sorted_list.h" #include "knowledge.h" @@ -35,6 +36,28 @@ const ZoO_char const ZoO_knowledge_forbidden_chars[8]= '>' }; +static int cmp_word +( + const void * const a, + const void * const b, + const void * const other +) +{ + ZoO_char const * word; + ZoO_index const * sorted_index; + struct ZoO_knowledge const * k; + + word = (ZoO_char const *) a; + sorted_index = (ZoO_index const *) b; + k = (struct ZoO_knowledge *) other; + + return strcmp + ( + (const char *) word, + (const char *) k->words[*sorted_index].word + ); +} + /* See "knowledge.h". */ int ZoO_knowledge_find ( @@ -43,74 +66,31 @@ int ZoO_knowledge_find ZoO_index result [const restrict static 1] ) { - int cmp; - ZoO_index i, current_min, current_max; + ZoO_index r; - /* This is a binary search. */ - - if (k->words_count < 1) + if + ( + ZoO_sorted_list_index_of + ( + k->words_count, + (void const *) k->sorted_indices, + (void const *) word, + sizeof(ZoO_index), + cmp_word, + (void const *) k, + &r + ) + == 0 + ) { - *result = 0; + *result = k->sorted_indices[r]; - return -1; + return 0; } - current_min = 0; - - /* Safe: (>= k->words_count 1) */ - current_max = (k->words_count - 1); - - for (;;) - { - /* FIXME: overflow-safe? */ - i = ((current_min + current_max) / 2); - - if (i == k->words_count) - { - *result = k->words_count; - - return -1; - } - - cmp = - /* XXX: Assumed to be compatible with ZoO_char */ - strcmp - ( - (char *) word, - (const char *) k->words[k->sorted_indices[i]].word - ); - - if (cmp > 0) - { - if ((current_min > current_max)) - { - *result = (i + 1); - - return -1; - } + *result = r; - /* FIXME: overflow-safe? */ - current_min = (i + 1); - } - else if (cmp < 0) - { - if ((current_min > current_max) || (i == 0)) - { - *result = i; - - return -1; - } - - /* overflow-safe */ - current_max = (i - 1); - } - else - { - *result = k->sorted_indices[i]; - - return 0; - } - } + return -1; } static void word_init (struct ZoO_knowledge_word w [const restrict static 1]) @@ -121,10 +101,8 @@ static void word_init (struct ZoO_knowledge_word w [const restrict static 1]) w->occurrences = 1; w->forward_links_count = 0; w->backward_links_count = 0; - w->forward_links_occurrences = (ZoO_index *) NULL; - w->backward_links_occurrences = (ZoO_index *) NULL; - w->forward_links = (ZoO_index *) NULL; - w->backward_links = (ZoO_index *) NULL; + w->forward_links = (struct ZoO_knowledge_link *) NULL; + w->backward_links = (struct ZoO_knowledge_link *) NULL; } /* @@ -209,6 +187,21 @@ int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) return 0; } +static void finalize_links +( + ZoO_index const count, + struct ZoO_knowledge_link links [const restrict static count] +) +{ + ZoO_index i; + + for (i = 0; i < count; ++i) + { + free((void *) links[i].targets_occurrences); + free((void *) links[i].targets); + } +} + /* * Frees all the memory used by {w}, but not {w} itself. * The values of {w}'s members are set to reflect the changes. @@ -225,32 +218,22 @@ static void finalize_word w->word = (ZoO_char *) NULL; } - if (w->forward_links_occurrences != (ZoO_index *) NULL) + if (w->forward_links != (struct ZoO_knowledge_link *) NULL) { - free((void *) w->forward_links_occurrences); - - w->forward_links_occurrences = (ZoO_index *) NULL; - } + finalize_links(w->forward_links_count, w->forward_links); - if (w->backward_links_occurrences != (ZoO_index *) NULL) - { - free((void *) w->backward_links_occurrences); - - w->backward_links_occurrences = (ZoO_index *) NULL; - } - - if (w->forward_links != (ZoO_index *) NULL) - { free((void *) w->forward_links); - w->forward_links = (ZoO_index *) NULL; + w->forward_links = (struct ZoO_knowledge_link *) NULL; } - if (w->backward_links != (ZoO_index *) NULL) + if (w->backward_links != (struct ZoO_knowledge_link *) NULL) { + finalize_links(w->backward_links_count, w->backward_links); + free((void *) w->backward_links); - w->backward_links = (ZoO_index *) NULL; + w->backward_links = (struct ZoO_knowledge_link *) NULL; } w->forward_links_count = 0; diff --git a/src/core/knowledge.h b/src/core/knowledge.h index c9ea342..93f5f49 100644 --- a/src/core/knowledge.h +++ b/src/core/knowledge.h @@ -75,4 +75,22 @@ int ZoO_knowledge_extend ZoO_char * result [const static 1] ); +int ZoO_knowledge_find_link +( + ZoO_index const links_count, + struct ZoO_knowledge_link links [const], + ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], + ZoO_index result [const restrict static 1] +); + +/* Create it if it's not found. */ +int ZoO_knowledge_get_link +( + ZoO_index links_count [const], + struct ZoO_knowledge_link * links [const], + ZoO_index const sequence [const restrict static ZoO_S_LINK_SIZE], + ZoO_index result [const restrict static 1] +); + + #endif diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h index f2e8161..8f541e7 100644 --- a/src/core/knowledge_types.h +++ b/src/core/knowledge_types.h @@ -6,6 +6,14 @@ #define ZoO_WORD_START_OF_LINE 0 #define ZoO_WORD_END_OF_LINE 1 +#if ZoO_MARKOV_ORDER == 1 + #define ZoO_SEQUENCE_SIZE 1 +#else + #define ZoO_SEQUENCE_SIZE ZoO_MARKOV_ORDER - 1 +#endif + +#define ZoO_S_LINK_SIZE (ZoO_SEQUENCE_SIZE + 1) + /* XXX: are we as close to immutable as we want to be? */ extern unsigned int const ZoO_knowledge_punctuation_chars_count; extern const ZoO_char const ZoO_knowledge_punctuation_chars[7]; @@ -22,6 +30,15 @@ enum ZoO_knowledge_special_effect ZoO_WORD_REMOVES_RIGHT_SPACE }; +struct ZoO_knowledge_link +{ + ZoO_index sequence[ZoO_SEQUENCE_SIZE]; + ZoO_index occurrences; + ZoO_index targets_count; + ZoO_index * targets_occurrences; + ZoO_index * targets; +}; + struct ZoO_knowledge_word { size_t word_size; @@ -30,12 +47,11 @@ struct ZoO_knowledge_word ZoO_index occurrences; ZoO_index forward_links_count; ZoO_index backward_links_count; - ZoO_index * forward_links_occurrences; - ZoO_index * backward_links_occurrences; - ZoO_index * forward_links; - ZoO_index * backward_links; + struct ZoO_knowledge_link * forward_links; + struct ZoO_knowledge_link * backward_links; }; + struct ZoO_knowledge { ZoO_index words_count; diff --git a/src/core/sequence.c b/src/core/sequence.c new file mode 100644 index 0000000..0f4043e --- /dev/null +++ b/src/core/sequence.c @@ -0,0 +1,129 @@ +#include <stdlib.h> +#include <string.h> + +#include "../io/error.h" +#include "../tool/sorted_list.h" + +#include "knowledge.h" + +static int cmp_seq_link +( + const void * const a, + const void * const b, + const void * const other +) +{ + ZoO_index j; + ZoO_index * sequence; + struct ZoO_knowledge_link * link; + + sequence = (ZoO_index *) a; + link = (struct ZoO_knowledge_link *) b; + + for (j = 0; j < ZoO_SEQUENCE_SIZE; ++j) + { + if (sequence[j] < link->sequence[j]) + { + return -1; + } + else if (sequence[j] > link->sequence[j]) + { + return 1; + } + } + + return 0; +} + +int ZoO_knowledge_find_link +( + ZoO_index const links_count, + struct ZoO_knowledge_link links [const], + ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], + ZoO_index result [const restrict static 1] +) +{ + return + ZoO_sorted_list_index_of + ( + links_count, + (void const *) links, + (void const *) sequence, + sizeof(struct ZoO_knowledge_link), + cmp_seq_link, + (void const *) NULL, + result + ); +} + +int ZoO_knowledge_get_link +( + ZoO_index links_count [const], + struct ZoO_knowledge_link * links [const], + ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], + ZoO_index result [const restrict static 1] +) +{ + struct ZoO_knowledge_link * new_p; + + if + ( + ZoO_sorted_list_index_of + ( + *links_count, + (void const *) *links, + (void const *) sequence, + sizeof(struct ZoO_knowledge_link), + cmp_seq_link, + (void const *) NULL, + result + ) == 0 + ) + { + return 0; + } + + *links_count += 1; + + new_p = + (struct ZoO_knowledge_link *) realloc + ( + (void *) *links, + (sizeof(struct ZoO_knowledge_link) * (*links_count)) + ); + + if (new_p == (struct ZoO_knowledge_link *) NULL) + { + *links_count -= 1; + + return -1; + } + + if (*result < (*links_count - 1)) + { + memmove( + (void *) (new_p + *result + 1), + (const void *) (new_p + *result), + (sizeof(struct ZoO_knowledge_link) * (*links_count - 1 - *result)) + ); + } + + *links = new_p; + + new_p += *result; + + memcpy + ( + (void *) new_p->sequence, + (void const *) sequence, + /* can be zero */ + (sizeof(ZoO_index) * ZoO_SEQUENCE_SIZE) + ); + + new_p->occurrences = 0; + new_p->targets_count = 0; + new_p->targets_occurrences = (ZoO_index *) NULL; + new_p->targets = (ZoO_index *) NULL; + + return 0; +} diff --git a/src/io/error.h b/src/io/error.h index e4267a0..be7359f 100644 --- a/src/io/error.h +++ b/src/io/error.h @@ -23,6 +23,8 @@ #define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL) #endif +#define ZoO_DEBUG_NETWORK 1 + #ifndef ZoO_DEBUG_NETWORK #define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL) #endif diff --git a/src/pervasive.h b/src/pervasive.h index d2b0344..4cc43fe 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -39,6 +39,10 @@ #define ZoO_DEFAULT_REPLY_RATE 8 #endif +#ifndef ZoO_MARKOV_ORDER + #define ZoO_MARKOV_ORDER 2 +#endif + typedef unsigned int ZoO_index; #define ZoO_INDEX_MAX UINT_MAX diff --git a/src/tool/CMakeLists.txt b/src/tool/CMakeLists.txt index 3a1d947..3ad122b 100644 --- a/src/tool/CMakeLists.txt +++ b/src/tool/CMakeLists.txt @@ -1,6 +1,7 @@ set( SRC_FILES ${SRC_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/strings.c + ${CMAKE_CURRENT_SOURCE_DIR}/sorted_list.c ) set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/tool/sorted_list.c b/src/tool/sorted_list.c new file mode 100644 index 0000000..880a4b5 --- /dev/null +++ b/src/tool/sorted_list.c @@ -0,0 +1,79 @@ +#include "./sorted_list.h" + +int ZoO_sorted_list_index_of +( + ZoO_index const list_length, + const void * const sorted_list, + const void * const elem, + size_t const type_size, + int (*compare) (const void *, const void *, const void *), + const void * const other, + ZoO_index result [const restrict static 1] +) +{ + int cmp; + ZoO_index i, current_min, current_max; + const char * sorted_list_access; + + sorted_list_access = (char *) sorted_list; + + /* This is a binary search. */ + + if (list_length == 0) + { + *result = 0; + + return -1; + } + + current_min = 0; + + current_max = (list_length - 1); + + for (;;) + { + /* FIXME: overflow-safe? */ + i = ((current_min + current_max) / 2); + + if (i == list_length) + { + /* FIXME: I don't see how this one can be true */ + *result = list_length; + + return -1; + } + + cmp = compare(elem, (sorted_list_access + (i * type_size)), other); + + if (cmp > 0) + { + if ((current_min > current_max)) + { + *result = (i + 1); + + return -1; + } + + /* FIXME: overflow-safe? */ + current_min = (i + 1); + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = i; + + return -1; + } + + /* overflow-safe */ + current_max = (i - 1); + } + else + { + *result = i; + + return 0; + } + } +} diff --git a/src/tool/sorted_list.h b/src/tool/sorted_list.h new file mode 100644 index 0000000..72bd893 --- /dev/null +++ b/src/tool/sorted_list.h @@ -0,0 +1,19 @@ +#ifndef _ZoO_TOOL_SORTED_LIST_H_ +#define _ZoO_TOOL_SORTED_LIST_H_ + +#include <stdlib.h> + +#include "../pervasive.h" + +int ZoO_sorted_list_index_of +( + ZoO_index const list_length, + const void * const sorted_list, + const void * const elem, + size_t const type_size, + int (*compare) (const void *, const void *, const void *), + const void * const other, + ZoO_index result [const restrict static 1] +); + +#endif |


