| summaryrefslogtreecommitdiff |
diff options
| author | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-05-05 14:59:28 +0200 |
|---|---|---|
| committer | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-05-05 14:59:28 +0200 |
| commit | 3405b0c1635843cbb81f042364bfcf238d7dc930 (patch) | |
| tree | 39501fec9ec72863c929a45dbc297412bbf90688 /src/core | |
| parent | c28bb6d31a122ec983e1e0a0dd1a8bd198098c58 (diff) | |
Adds the current code.
It's been running for close to a month on one of the IRC channels I
frequent and seems to be working fine.
One should be aware that, among other missing features, this version
does not store permanently what the bot learns. Indeed, I am currently
using a file with 431848 lines as its initial knowledge bank, making
this particular feature not a high priority one.
Also consider the fact that Zero of One converts text to underscore
before reading it but will not change its own aliases. This could
potentially be a cause for surprises when using uppercase letters in the
latter.
Diffstat (limited to 'src/core')
| -rw-r--r-- | src/core/CMakeLists.txt | 10 | ||||
| -rw-r--r-- | src/core/assimilate.c | 232 | ||||
| -rw-r--r-- | src/core/create_sentences.c | 486 | ||||
| -rw-r--r-- | src/core/knowledge.c | 447 | ||||
| -rw-r--r-- | src/core/knowledge.h | 42 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 46 | ||||
| -rw-r--r-- | src/core/main.c | 296 | ||||
| -rw-r--r-- | src/core/state_types.h | 16 |
8 files changed, 1575 insertions, 0 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 0000000..2722355 --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,10 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/main.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c + ${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/core/assimilate.c b/src/core/assimilate.c new file mode 100644 index 0000000..eb6aa17 --- /dev/null +++ b/src/core/assimilate.c @@ -0,0 +1,232 @@ +#include <stdlib.h> +#include <string.h> + +#include "../io/error.h" + +#include "knowledge.h" + +static int link_to +( + ZoO_index links_count [const restrict static 1], + ZoO_index * links_occurrences [const restrict static 1], + ZoO_index * links [const restrict static 1], + ZoO_index const target +) +{ + ZoO_index i, * new_p; + + for (i = 0; i < *links_count; ++i) + { + if ((*links)[i] == target) + { + if ((*links_occurrences)[i] == ZoO_INDEX_MAX) + { + ZoO_S_WARNING + ( + "Maximum link occurrences count has been reached." + ); + + return -1; + } + + (*links_occurrences)[i] += 1; + + return 0; + } + } + + if (*links_count == ZoO_INDEX_MAX) + { + ZoO_S_WARNING("Maximum links count has been reached."); + + return -1; + } + + new_p = + (ZoO_index *) realloc + ( + *links_occurrences, + ( + ( + /* Safe: *links_count < ZoO_INDEX_MAX */ + (size_t) (*links_count + 1) + ) + * sizeof(ZoO_index) + ) + ); + + if (new_p == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Could not reallocate a link occurrences list."); + + return -1; + } + + new_p[*links_count] = 1; + + *links_occurrences = new_p; + + new_p = + (ZoO_index *) realloc + ( + *links, + ( + ( + /* Safe: *links_count < ZoO_INDEX_MAX */ + (size_t) (*links_count + 1) + ) * sizeof(ZoO_index) + ) + ); + + if (new_p == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Could not reallocate a link list."); + + return -1; + } + + new_p[*links_count] = target; + + *links = new_p; + + *links_count += 1; + + return 0; +} + +static int link_words +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index const a, + ZoO_index const b +) +{ + int error; + + error = + link_to + ( + &(k->words[a].forward_links_count), + &(k->words[a].forward_links_occurrences), + &(k->words[a].forward_links), + b + ); + + error = + ( + link_to + ( + &(k->words[b].backward_links_count), + &(k->words[b].backward_links_occurrences), + &(k->words[b].backward_links), + a + ) + | error + ); + + return error; +} + +int ZoO_knowledge_assimilate +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index const aliases_count, + const char * restrict aliases [const restrict static aliases_count] +) +{ + int error; + ZoO_index curr_word, next_word; + ZoO_index curr_word_id, next_word_id; + + curr_word = 0; + + if (string->words_count == 0) + { + return 0; + } + + for (curr_word = 0; curr_word < aliases_count; ++curr_word) + { + if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0])) + { + return 0; + } + } + + curr_word = 0; + + if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0) + { + return -1; + } + + if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not indicate that '" + ZoO_CHAR_STRING_SYMBOL + "' was the first word of the sentence.", + string->words[0] + ); + } + + next_word = 1; + + error = 0; + + while (next_word < string->words_count) + { + /* prevents words [restrict], k [restrict] */ + if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0) + { + return -1; + } + + if (link_words(k, curr_word_id, next_word_id) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not add a link between words '" + ZoO_CHAR_STRING_SYMBOL + "' and '" + ZoO_CHAR_STRING_SYMBOL + "'.", + string->words[curr_word], + string->words[next_word] + ); + } + + curr_word = next_word; + curr_word_id = next_word_id; + /* + * Safe: + * - next_word < words_count + * - words_count =< ZoO_INDEX_MAX + * ---- + * next_word < ZoO_INDEX_MAX + */ + next_word += 1; + } + + if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not indicate that '" + ZoO_CHAR_STRING_SYMBOL + "' was the last word of the sentence.", + string->words[curr_word_id] + ); + } + + return error; +} + diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c new file mode 100644 index 0000000..bc410e5 --- /dev/null +++ b/src/core/create_sentences.c @@ -0,0 +1,486 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +static ZoO_index pick_an_index +( + ZoO_index const occurrences, + const ZoO_index links_occurrences [const restrict static 1], + const ZoO_index links [const restrict static 1] +) +{ + ZoO_index result, accumulator, random_number; + + result = 0; + accumulator = links_occurrences[0]; + random_number = (((ZoO_index) rand()) % occurrences); + + while (accumulator < random_number) + { + + /* + * Should be safe: + * result overflowing <-> sum('links_occurrences') > 'occurrences' + * and sum('links_occurrences') == 'occurrences' + */ + result += 1; + + /* + * Should be safe: + * - sum('links_occurrences') == 'occurrences'. + * - 'occurrences' is safe. + * ---- + * 'accumulator' is safe. + */ + accumulator += links_occurrences[result]; + } + + return links[result]; +} + +static unsigned char * extend_left +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index word_id, + ZoO_char current_sentence [static 1], + size_t sentence_size [const restrict static 1], + ZoO_index credits [const static 1] +) +{ + size_t addition_size; + struct ZoO_knowledge_word * w; + ZoO_char * next_sentence; + + w = (k->words + word_id); + + if + ( + (w->special == ZoO_WORD_STARTS_SENTENCE) + || (w->occurrences == 0) + ) + { + return current_sentence; + } + + /* prevents current_sentence [restrict] */ + next_sentence = current_sentence; + + for (;;) + { + if (*credits == 0) + { + return current_sentence; + } + + *credits -= 1; + word_id = + pick_an_index + ( + w->occurrences, + w->backward_links_occurrences, + w->backward_links + ); + + w = (k->words + word_id); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + /* FIXME: not overflow-safe. */ + /* word also contains an '\0', which we will replace by a ' ' */ + addition_size = w->word_size; + break; + + case ZoO_WORD_ENDS_SENTENCE: + ZoO_S_WARNING("END OF LINE should not be prefixable."); + return current_sentence; + + case ZoO_WORD_STARTS_SENTENCE: + return current_sentence; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word also contains an '\0', which we will remove. */ + addition_size = w->word_size - 1; + break; + } + + if (*sentence_size > (SIZE_MAX - addition_size)) + { + ZoO_S_WARNING + ( + "Sentence construction aborted to avoid size_t overflow." + ); + + return current_sentence; + } + + next_sentence = + (ZoO_char *) calloc + ( + /* overflow-safe */ + (*sentence_size + addition_size), + sizeof(ZoO_char) + ); + + if (next_sentence == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to store new sentence."); + + return current_sentence; + } + + /* overflow-safe */ + *sentence_size = (*sentence_size + addition_size); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + next_sentence, + *sentence_size, + " " ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + current_sentence + ); + break; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + current_sentence + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + /* Safe: strlen(current_sentence) >= 2 */ + (current_sentence + 1) + ); + break; + + default: + /* TODO: PROGRAM LOGIC ERROR */ + break; + } + + free((void *) current_sentence); + + /* prevents current_sentence [const] */ + current_sentence = next_sentence; + } +} + +static unsigned char * extend_right +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index word_id, + ZoO_char current_sentence [static 1], + size_t sentence_size [const restrict static 1], + ZoO_index credits [const static 1] +) +{ + size_t addition_size; + struct ZoO_knowledge_word * w; + ZoO_char * next_sentence; + + w = (k->words + word_id); + + if + ( + (w->special == ZoO_WORD_ENDS_SENTENCE) + || (w->occurrences == 0) + ) + { + return current_sentence; + } + + /* prevents current_sentence [restrict] */ + next_sentence = current_sentence; + + for (;;) + { + if (*credits == 0) + { + return current_sentence; + } + + *credits -= 1; + + word_id = + pick_an_index + ( + w->occurrences, + w->forward_links_occurrences, + w->forward_links + ); + + w = (k->words + word_id); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + /* FIXME: Assumed to be overflow-safe. */ + /* word also contains an '\0', which we will replace by a ' '. */ + addition_size = w->word_size; + break; + + case ZoO_WORD_ENDS_SENTENCE: + return current_sentence; + + case ZoO_WORD_STARTS_SENTENCE: + ZoO_S_WARNING("START OF LINE should not be suffixable."); + return current_sentence; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word also contains an '\0', which we will remove. */ + addition_size = w->word_size - 1; + break; + } + + if (*sentence_size > (SIZE_MAX - addition_size)) + { + ZoO_S_WARNING + ( + "Sentence construction aborted to avoid size_t overflow." + ); + + return current_sentence; + } + + next_sentence = + (ZoO_char *) calloc + ( + /* overflow-safe */ + (*sentence_size + addition_size), + sizeof(ZoO_char) + ); + + if (next_sentence == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to store new sentence."); + + return current_sentence; + } + + /* overflow-safe */ + *sentence_size = (*sentence_size + addition_size); + + switch (w->special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + printf + ( + "current sentence:'%s', pointing at '%c'.\n", + current_sentence, + current_sentence[*sentence_size - addition_size - 2] + ); + current_sentence[*sentence_size - addition_size - 2] = '\0'; + + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL " ", + current_sentence, + w->word + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + current_sentence, + w->word + ); + break; + + default: + /* TODO: PROGRAM LOGIC ERROR */ + break; + } + + free((void *) current_sentence); + + /* prevents current_sentence [const] */ + current_sentence = next_sentence; + } +} + +int ZoO_knowledge_extend +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const static 1], + int const ignore_first_word, + ZoO_char * result [const static 1] +) +{ + int word_found; + size_t sentence_size; + ZoO_index i, word_id, word_min_score, word_min_id, credits; + + word_found = 0; + credits = ZoO_MAX_REPLY_WORDS; + + if (ignore_first_word) + { + i = 1; + } + else + { + i = 0; + } + + for (; i < string->words_count; ++i) + { + /* prevents k [restrict] */ + if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0) + { + word_found = 1; + word_min_score = k->words[word_min_id].occurrences; + + break; + } + } + + if (word_found == 0) + { + word_min_id = (rand() % k->words_count); + word_min_score = k->words[word_min_id].occurrences; + } + + for (; i < string->words_count; ++i) + { + if + ( + (ZoO_knowledge_find(k, string->words[i], &word_id) == 0) + && (k->words[word_id].occurrences < word_min_score) + ) + { + word_min_score = k->words[word_id].occurrences; + word_min_id = word_id; + } + } + + /* 3: 2 spaces + '\0' */ + /* FIXME: not overflow-safe */ + switch (k->words[word_min_id].special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word + ' ' + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 2); + break; + + case ZoO_WORD_HAS_NO_EFFECT: + /* word + ' ' * 2 + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 3); + break; + + default: + ZoO_WARNING + ( + "'%s' was unexpectedly selected as pillar.", + k->words[word_min_id].word + ); + /* word + '[' + ']' + ' ' * 2 + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 5); + break; + } + + *result = (ZoO_char *) calloc(sentence_size, sizeof(ZoO_char)); + + if (*result == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to start sentence."); + + return -2; + } + + switch (k->words[word_min_id].special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + snprintf + ( + *result, + sentence_size, + ZoO_CHAR_STRING_SYMBOL " ", + k->words[word_min_id].word + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + *result, + sentence_size, + " " ZoO_CHAR_STRING_SYMBOL, + k->words[word_min_id].word + ); + break; + + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + *result, + sentence_size, + " " ZoO_CHAR_STRING_SYMBOL " ", + k->words[word_min_id].word + ); + break; + + default: + snprintf + ( + *result, + sentence_size, + " [" ZoO_CHAR_STRING_SYMBOL "] ", + k->words[word_min_id].word + ); + break; + } + + if ((word_min_score == 0) || (credits == 0)) + { + return 0; + } + + --credits; + + /* prevents result [restrict] */ + *result = extend_left(k, word_min_id, *result, &sentence_size, &credits); + + if (*result == (ZoO_char *) NULL) + { + return -2; + } + + *result = extend_right(k, word_min_id, *result, &sentence_size, &credits); + + if (*result == (ZoO_char *) NULL) + { + return -2; + } + + return 0; +} diff --git a/src/core/knowledge.c b/src/core/knowledge.c new file mode 100644 index 0000000..31ccb97 --- /dev/null +++ b/src/core/knowledge.c @@ -0,0 +1,447 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_punctuation_chars_count = 7; +const ZoO_char const ZoO_knowledge_punctuation_chars[7] = + { + '!', + ',', + '.', + ':', + ';', + '?', + '~' + }; + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_forbidden_chars_count = 8; +const ZoO_char const ZoO_knowledge_forbidden_chars[8]= + { + '(', + ')', + '[', + ']', + '{', + '}', + '<', + '>' + }; + +int ZoO_knowledge_find +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + int cmp; + ZoO_index i, current_min, current_max; + + /* This is a binary search. */ + + if (k->words_count < 1) + { + *result = 0; + + return -1; + } + + current_min = 0; + + /* overflow-safe: k->words_count >= 1 */ + current_max = (k->words_count - 1); + + for (;;) + { + /* FIXME: overflow-safe? */ + i = ((current_min + current_max) / 2); + + if (i == k->words_count) + { + *result = k->words_count; + + return -1; + } + + cmp = + /* XXX: Assumed to be compatible with ZoO_char */ + strcmp + ( + (char *) word, + (const char *) k->words[k->sorted_indices[i]].word + ); + + if (cmp > 0) + { + if ((current_min > current_max)) + { + *result = (i + 1); + + return -1; + } + + /* FIXME: overflow-safe? */ + current_min = (i + 1); + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = i; + + return -1; + } + + /* overflow-safe */ + current_max = (i - 1); + } + else + { + *result = k->sorted_indices[i]; + + return 0; + } + } +} + +static void word_init (struct ZoO_knowledge_word w [const restrict static 1]) +{ + w->word_size = 0; + w->word = (ZoO_char *) NULL; + w->special = ZoO_WORD_HAS_NO_EFFECT; + w->occurrences = 1; + w->forward_links_count = 0; + w->backward_links_count = 0; + w->forward_links_occurrences = (ZoO_index *) NULL; + w->backward_links_occurrences = (ZoO_index *) NULL; + w->forward_links = (ZoO_index *) NULL; + w->backward_links = (ZoO_index *) NULL; +} + +static int add_punctuation_nodes +( + struct ZoO_knowledge k [const static 1] +) +{ + int error; + char w[2]; + ZoO_index i, id; + + if (ZoO_knowledge_learn(k, "START OF LINE", &id) < 0) + { + ZoO_S_FATAL("Could not add 'START OF LINE' to knowledge."); + + return -2; + } + + k->words[id].special = ZoO_WORD_STARTS_SENTENCE; + k->words[id].occurrences = 0; + + if (ZoO_knowledge_learn(k, "END OF LINE", &id) < 0) + { + ZoO_S_FATAL("Could not add 'END OF LINE' to knowledge."); + + return -2; + } + + k->words[id].special = ZoO_WORD_ENDS_SENTENCE; + k->words[id].occurrences = 0; + + w[1] = '\0'; + + error = 0; + + for (i = 0; i < ZoO_knowledge_punctuation_chars_count; ++i) + { + w[0] = ZoO_knowledge_punctuation_chars[i]; + + if (ZoO_knowledge_learn(k, w, &id) < 0) + { + ZoO_WARNING("Could not add '%s' to knowledge.", w); + + error = -1; + } + else + { + k->words[id].special = ZoO_WORD_REMOVES_LEFT_SPACE; + k->words[id].occurrences = 0; + } + } + + return error; +} + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) +{ + k->words_count = 0; + k->words = (struct ZoO_knowledge_word *) NULL; + k->sorted_indices = (ZoO_index *) NULL; + + if (add_punctuation_nodes(k) < -1) + { + ZoO_knowledge_finalize(k); + + return -1; + } + + return 0; +} + +static void finalize_word +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + if (w->word != (ZoO_char *) NULL) + { + free((void *) w->word); + + w->word = (ZoO_char *) NULL; + } + + if (w->forward_links_occurrences != (ZoO_index *) NULL) + { + free((void *) w->forward_links_occurrences); + + w->forward_links_occurrences = (ZoO_index *) NULL; + } + + if (w->backward_links_occurrences != (ZoO_index *) NULL) + { + free((void *) w->backward_links_occurrences); + + w->backward_links_occurrences = (ZoO_index *) NULL; + } + + if (w->forward_links != (ZoO_index *) NULL) + { + free((void *) w->forward_links); + + w->forward_links = (ZoO_index *) NULL; + } + + if (w->backward_links != (ZoO_index *) NULL) + { + free((void *) w->backward_links); + + w->backward_links = (ZoO_index *) NULL; + } + + w->forward_links_count = 0; + w->backward_links_count = 0; +} + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) +{ + ZoO_index i; + + for (i = 0; i < k->words_count; ++i) + { + /* prevents k [restrict] */ + finalize_word(k->words + i); + } + + k->words_count = 0; + + if (k->words != (struct ZoO_knowledge_word *) NULL) + { + free((void *) k->words); + + k->words = (struct ZoO_knowledge_word *) NULL; + } + + if (k->sorted_indices != (ZoO_index *) NULL) + { + free((void *) k->sorted_indices); + + k->sorted_indices = (ZoO_index *) NULL; + } +} + +int ZoO_knowledge_learn +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + struct ZoO_knowledge_word * new_wordlist; + ZoO_index * new_sorted_indices; + ZoO_index temp; + + /* prevents k [restrict] */ + if (ZoO_knowledge_find(k, word, result) == 0) + { + if (k->words[*result].occurrences == ZoO_INDEX_MAX) + { + ZoO_WARNING + ( + "Maximum number of occurrences has been reached for word '" + ZoO_CHAR_STRING_SYMBOL + "'.", + word + ); + + return -1; + } + + /* overflow-safe */ + k->words[*result].occurrences += 1; + + return 0; + } + + if (k->words_count == ZoO_INDEX_MAX) + { + ZoO_S_WARNING("Maximum number of words has been reached."); + + return -1; + } + + new_wordlist = + (struct ZoO_knowledge_word *) realloc + ( + (void *) k->words, + ( + ( + /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ + (size_t) (k->words_count + 1) + ) + * sizeof(struct ZoO_knowledge_word) + ) + ); + + if (new_wordlist == (struct ZoO_knowledge_word *) NULL) + { + ZoO_ERROR + ( + "Could not learn the word '%s': unable to realloc the word list.", + word + ); + + return -1; + } + + k->words = new_wordlist; + + new_sorted_indices = + (ZoO_index *) realloc + ( + (void *) k->sorted_indices, + ( + ( + /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ + (size_t) (k->words_count + 1) + ) + * sizeof(ZoO_index) + ) + ); + + if (new_sorted_indices == (ZoO_index *) NULL) + { + ZoO_ERROR + ( + "Could not learn the word '" + ZoO_CHAR_STRING_SYMBOL + "': unable to realloc the index list.", + word + ); + + return -1; + } + + k->sorted_indices = new_sorted_indices; + + /* We can only move indices right of *result if they exist. */ + if (*result != k->words_count) + { + /* TODO: check if correct. */ + memmove + ( + /* + * overflow-safe: + * - k->words_count < ZoO_INDEX_MAX + * - (k->sorted_indices + *result + 1) =< k->words_count + */ + (void *) (k->sorted_indices + *result + 1), + /* overflow-safe: see above */ + (const void *) (k->sorted_indices + *result), + ( + ( + /* overflow-safe: *result < k->words_count */ + (size_t) (k->words_count - *result) + ) + * sizeof(ZoO_index) + ) + ); + } + + temp = *result; + + k->sorted_indices[*result] = k->words_count; + + *result = k->words_count; + + word_init(k->words + *result); + + /* XXX: strlen assumed to work with ZoO_char. */ + k->words[*result].word_size = strlen(word); + + if (k->words[*result].word_size == SIZE_MAX) + { + ZoO_S_WARNING + ( + "Could not learn word that had a size too big to store in a '\\0' " + "terminated string. Chances are, this is but a symptom of the real " + "problem." + ); + + return -1; + } + + /* We also need '\0' */ + k->words[*result].word_size += 1; + + k->words[*result].word = + (ZoO_char *) calloc + ( + k->words[*result].word_size, + sizeof(ZoO_char) + ); + + if (k->words[*result].word == (ZoO_char *) NULL) + { + ZoO_S_ERROR + ( + "Could not learn word due to being unable to allocate the memory to " + "store it." + ); + + k->words[*result].word_size = 0; + + return -1; + } + + memcpy(k->words[*result].word, word, k->words[*result].word_size); + + /* Safe: k->words_count < ZoO_INDEX_MAX */ + k->words_count += 1; + + ZoO_DEBUG + ( + ZoO_DEBUG_LEARNING, + "Learned word {'%s', id: %u, rank: %u}", + word, + *result, + temp + ); + + return 0; +} + diff --git a/src/core/knowledge.h b/src/core/knowledge.h new file mode 100644 index 0000000..f20cb16 --- /dev/null +++ b/src/core/knowledge.h @@ -0,0 +1,42 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_H_ +#define _ZoO_CORE_KNOWLEDGE_H_ + +#include "../tool/strings_types.h" + +#include "knowledge_types.h" + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]); + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); + +int ZoO_knowledge_find +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_learn +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_assimilate +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index const aliases_count, + const char * restrict aliases [const restrict static aliases_count] +); + +int ZoO_knowledge_extend +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const static 1], + int const ignore_first_word, + ZoO_char * result [const static 1] +); + +#endif diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h new file mode 100644 index 0000000..f2e8161 --- /dev/null +++ b/src/core/knowledge_types.h @@ -0,0 +1,46 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ +#define _ZoO_CORE_KNOWLEDGE_TYPES_H_ + +#include "../pervasive.h" + +#define ZoO_WORD_START_OF_LINE 0 +#define ZoO_WORD_END_OF_LINE 1 + +/* XXX: are we as close to immutable as we want to be? */ +extern unsigned int const ZoO_knowledge_punctuation_chars_count; +extern const ZoO_char const ZoO_knowledge_punctuation_chars[7]; +extern unsigned int const ZoO_knowledge_forbidden_chars_count; +extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; + + +enum ZoO_knowledge_special_effect +{ + ZoO_WORD_HAS_NO_EFFECT, + ZoO_WORD_ENDS_SENTENCE, + ZoO_WORD_STARTS_SENTENCE, + ZoO_WORD_REMOVES_LEFT_SPACE, + ZoO_WORD_REMOVES_RIGHT_SPACE +}; + +struct ZoO_knowledge_word +{ + size_t word_size; + ZoO_char * word; + enum ZoO_knowledge_special_effect special; + ZoO_index occurrences; + ZoO_index forward_links_count; + ZoO_index backward_links_count; + ZoO_index * forward_links_occurrences; + ZoO_index * backward_links_occurrences; + ZoO_index * forward_links; + ZoO_index * backward_links; +}; + +struct ZoO_knowledge +{ + ZoO_index words_count; + ZoO_index * sorted_indices; + struct ZoO_knowledge_word * words; +}; + +#endif diff --git a/src/core/main.c b/src/core/main.c new file mode 100644 index 0000000..34233d6 --- /dev/null +++ b/src/core/main.c @@ -0,0 +1,296 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <signal.h> + +#include "../tool/strings.h" + +#include "../io/error.h" +#include "../io/parameters.h" +#include "../io/data_input.h" +#include "../io/network.h" + + +#include "knowledge.h" + +#include "state_types.h" + +static int run = 1; + +static void request_termination (int const signo) +{ + if ((signo == SIGINT) || (signo == SIGTERM)) + { + run = 0; + } +} + +static int initialize +( + struct ZoO_state s [const static 1], + int const argc, + const char * argv [const static argc] +) +{ + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing..."); + + srand(time(NULL)); + + /* prevents s [restrict] */ + if (ZoO_knowledge_initialize(&(s->knowledge)) < 0) + { + return -1; + } + + if (ZoO_parameters_initialize(&(s->param), argc, argv) < 0) + { + ZoO_knowledge_finalize(&(s->knowledge)); + + return -1; + } + + return 0; +} + +static int load_data_file (struct ZoO_state s [const static 1]) +{ + struct ZoO_data_input input; + char * result; + + if (ZoO_data_input_open(&input, s->param.data_filename) < 0) + { + return -1; + } + + while + ( + ZoO_data_input_read_line + ( + &input, + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) == 0 + ) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + &(input.string), + s->param.aliases_count, + s->param.aliases + ); + } + + ZoO_data_input_close(&input); + + return 0; +} + +static int finalize (struct ZoO_state s [const static 1]) +{ + int error; + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing..."); + + error = 0; + + /* prevents s [restrict] */ + ZoO_knowledge_finalize(&(s->knowledge)); + + return error; +} + +static int network_connect (struct ZoO_state s [const static 1]) +{ + return + ZoO_network_connect + ( + &(s->network), + s->param.irc_server_addr, + s->param.irc_server_port, + s->param.irc_server_channel, + s->param.irc_username, + s->param.irc_realname, + s->param.aliases[0] + ); +} + +static int should_reply +( + struct ZoO_parameters param [const restrict static 1], + struct ZoO_strings string [const restrict static 1], + int should_learn [const restrict static 1] +) +{ + ZoO_index i, j; + + for (i = 0; i < param->aliases_count; ++i) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[0])) + { + *should_learn = 0; + + return 1; + } + + for (j = 1; j < string->words_count; ++j) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[j])) + { + *should_learn = 1; + + return 1; + } + } + } + + *should_learn = 1; + + return (param->reply_rate >= (rand() % 100)); +} + +static void handle_message +( + struct ZoO_state s [const static 1], + struct ZoO_strings string [const restrict static 1], + ssize_t const msg_offset, + ssize_t const msg_size +) +{ + ZoO_char * line; + int reply, learn; + + if + ( + ZoO_strings_parse + ( + string, + (size_t) msg_size, + (s->network.msg + msg_offset), + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) < 0 + ) + { + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg."); + + return; + } + + if (string->words_count == 0) + { + return; + } + + reply = should_reply(&(s->param), string, &learn); + + if + ( + reply + && + ( + ZoO_knowledge_extend + ( + &(s->knowledge), + string, + !learn, + &line + ) == 0 + ) + ) + { + if (line[0] == ' ') + { + strcpy((s->network.msg), (line + 1)); + } + else + { + strcpy((s->network.msg), line); + } + + free((void *) line); + + ZoO_network_send(&(s->network)); + } + + if (learn) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + string, + s->param.aliases_count, + s->param.aliases + ); + } +} + +static int main_loop (struct ZoO_state s [const static 1]) +{ + struct ZoO_strings string; + ssize_t msg_offset, msg_size; + + msg_offset = 0; + msg_size = 0; + + ZoO_strings_initialize(&string); + + while (run) + { + if (ZoO_network_receive(&(s->network), &msg_offset, &msg_size) == 0) + { + handle_message(s, &string, msg_offset, msg_size); + } + } + + ZoO_strings_finalize(&string); + + ZoO_network_disconnect(&(s->network)); + + return 0; +} + +int main (int const argc, const char * argv [const static argc]) +{ + struct ZoO_state s; + + if (initialize(&s, argc, argv) < 0) + { + return -1; + } + + if (load_data_file(&s) < 0) + { + goto CRASH; + } + + if (network_connect(&s) < 0) + { + goto CRASH; + } + + if (main_loop(&s) < 0) + { + goto CRASH; + } + + (void) finalize(&s); + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally."); + + return 0; + + CRASH: + { + (void) finalize(&s); + + ZoO_S_DEBUG + ( + ZoO_DEBUG_PROGRAM_FLOW, + "Zero of One terminated by crashing." + ); + + return -1; + } +} diff --git a/src/core/state_types.h b/src/core/state_types.h new file mode 100644 index 0000000..89c814e --- /dev/null +++ b/src/core/state_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_CORE_STATE_TYPES_H_ +#define _ZoO_CORE_STATE_TYPES_H_ + +#include "../io/parameters_types.h" +#include "../io/network_types.h" + +#include "knowledge_types.h" + +struct ZoO_state +{ + struct ZoO_parameters param; + struct ZoO_knowledge knowledge; + struct ZoO_network network; +}; + +#endif |


