summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-05-05 14:59:28 +0200
committerNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-05-05 14:59:28 +0200
commit3405b0c1635843cbb81f042364bfcf238d7dc930 (patch)
tree39501fec9ec72863c929a45dbc297412bbf90688 /src/core
parentc28bb6d31a122ec983e1e0a0dd1a8bd198098c58 (diff)
Adds the current code.
It's been running for close to a month on one of the IRC channels I frequent and seems to be working fine. One should be aware that, among other missing features, this version does not store permanently what the bot learns. Indeed, I am currently using a file with 431848 lines as its initial knowledge bank, making this particular feature not a high priority one. Also consider the fact that Zero of One converts text to underscore before reading it but will not change its own aliases. This could potentially be a cause for surprises when using uppercase letters in the latter.
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CMakeLists.txt10
-rw-r--r--src/core/assimilate.c232
-rw-r--r--src/core/create_sentences.c486
-rw-r--r--src/core/knowledge.c447
-rw-r--r--src/core/knowledge.h42
-rw-r--r--src/core/knowledge_types.h46
-rw-r--r--src/core/main.c296
-rw-r--r--src/core/state_types.h16
8 files changed, 1575 insertions, 0 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
new file mode 100644
index 0000000..2722355
--- /dev/null
+++ b/src/core/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c
+)
+
+set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
+
diff --git a/src/core/assimilate.c b/src/core/assimilate.c
new file mode 100644
index 0000000..eb6aa17
--- /dev/null
+++ b/src/core/assimilate.c
@@ -0,0 +1,232 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "../io/error.h"
+
+#include "knowledge.h"
+
+static int link_to
+(
+ ZoO_index links_count [const restrict static 1],
+ ZoO_index * links_occurrences [const restrict static 1],
+ ZoO_index * links [const restrict static 1],
+ ZoO_index const target
+)
+{
+ ZoO_index i, * new_p;
+
+ for (i = 0; i < *links_count; ++i)
+ {
+ if ((*links)[i] == target)
+ {
+ if ((*links_occurrences)[i] == ZoO_INDEX_MAX)
+ {
+ ZoO_S_WARNING
+ (
+ "Maximum link occurrences count has been reached."
+ );
+
+ return -1;
+ }
+
+ (*links_occurrences)[i] += 1;
+
+ return 0;
+ }
+ }
+
+ if (*links_count == ZoO_INDEX_MAX)
+ {
+ ZoO_S_WARNING("Maximum links count has been reached.");
+
+ return -1;
+ }
+
+ new_p =
+ (ZoO_index *) realloc
+ (
+ *links_occurrences,
+ (
+ (
+ /* Safe: *links_count < ZoO_INDEX_MAX */
+ (size_t) (*links_count + 1)
+ )
+ * sizeof(ZoO_index)
+ )
+ );
+
+ if (new_p == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR("Could not reallocate a link occurrences list.");
+
+ return -1;
+ }
+
+ new_p[*links_count] = 1;
+
+ *links_occurrences = new_p;
+
+ new_p =
+ (ZoO_index *) realloc
+ (
+ *links,
+ (
+ (
+ /* Safe: *links_count < ZoO_INDEX_MAX */
+ (size_t) (*links_count + 1)
+ ) * sizeof(ZoO_index)
+ )
+ );
+
+ if (new_p == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR("Could not reallocate a link list.");
+
+ return -1;
+ }
+
+ new_p[*links_count] = target;
+
+ *links = new_p;
+
+ *links_count += 1;
+
+ return 0;
+}
+
+static int link_words
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index const a,
+ ZoO_index const b
+)
+{
+ int error;
+
+ error =
+ link_to
+ (
+ &(k->words[a].forward_links_count),
+ &(k->words[a].forward_links_occurrences),
+ &(k->words[a].forward_links),
+ b
+ );
+
+ error =
+ (
+ link_to
+ (
+ &(k->words[b].backward_links_count),
+ &(k->words[b].backward_links_occurrences),
+ &(k->words[b].backward_links),
+ a
+ )
+ | error
+ );
+
+ return error;
+}
+
+int ZoO_knowledge_assimilate
+(
+ struct ZoO_knowledge k [const static 1],
+ struct ZoO_strings string [const restrict static 1],
+ ZoO_index const aliases_count,
+ const char * restrict aliases [const restrict static aliases_count]
+)
+{
+ int error;
+ ZoO_index curr_word, next_word;
+ ZoO_index curr_word_id, next_word_id;
+
+ curr_word = 0;
+
+ if (string->words_count == 0)
+ {
+ return 0;
+ }
+
+ for (curr_word = 0; curr_word < aliases_count; ++curr_word)
+ {
+ if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0]))
+ {
+ return 0;
+ }
+ }
+
+ curr_word = 0;
+
+ if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0)
+ {
+ return -1;
+ }
+
+ if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0)
+ {
+ error = -1;
+
+ ZoO_WARNING
+ (
+ "Could not indicate that '"
+ ZoO_CHAR_STRING_SYMBOL
+ "' was the first word of the sentence.",
+ string->words[0]
+ );
+ }
+
+ next_word = 1;
+
+ error = 0;
+
+ while (next_word < string->words_count)
+ {
+ /* prevents words [restrict], k [restrict] */
+ if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0)
+ {
+ return -1;
+ }
+
+ if (link_words(k, curr_word_id, next_word_id) < 0)
+ {
+ error = -1;
+
+ ZoO_WARNING
+ (
+ "Could not add a link between words '"
+ ZoO_CHAR_STRING_SYMBOL
+ "' and '"
+ ZoO_CHAR_STRING_SYMBOL
+ "'.",
+ string->words[curr_word],
+ string->words[next_word]
+ );
+ }
+
+ curr_word = next_word;
+ curr_word_id = next_word_id;
+ /*
+ * Safe:
+ * - next_word < words_count
+ * - words_count =< ZoO_INDEX_MAX
+ * ----
+ * next_word < ZoO_INDEX_MAX
+ */
+ next_word += 1;
+ }
+
+ if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0)
+ {
+ error = -1;
+
+ ZoO_WARNING
+ (
+ "Could not indicate that '"
+ ZoO_CHAR_STRING_SYMBOL
+ "' was the last word of the sentence.",
+ string->words[curr_word_id]
+ );
+ }
+
+ return error;
+}
+
diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c
new file mode 100644
index 0000000..bc410e5
--- /dev/null
+++ b/src/core/create_sentences.c
@@ -0,0 +1,486 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../io/error.h"
+
+#include "knowledge.h"
+
+static ZoO_index pick_an_index
+(
+ ZoO_index const occurrences,
+ const ZoO_index links_occurrences [const restrict static 1],
+ const ZoO_index links [const restrict static 1]
+)
+{
+ ZoO_index result, accumulator, random_number;
+
+ result = 0;
+ accumulator = links_occurrences[0];
+ random_number = (((ZoO_index) rand()) % occurrences);
+
+ while (accumulator < random_number)
+ {
+
+ /*
+ * Should be safe:
+ * result overflowing <-> sum('links_occurrences') > 'occurrences'
+ * and sum('links_occurrences') == 'occurrences'
+ */
+ result += 1;
+
+ /*
+ * Should be safe:
+ * - sum('links_occurrences') == 'occurrences'.
+ * - 'occurrences' is safe.
+ * ----
+ * 'accumulator' is safe.
+ */
+ accumulator += links_occurrences[result];
+ }
+
+ return links[result];
+}
+
+static unsigned char * extend_left
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index word_id,
+ ZoO_char current_sentence [static 1],
+ size_t sentence_size [const restrict static 1],
+ ZoO_index credits [const static 1]
+)
+{
+ size_t addition_size;
+ struct ZoO_knowledge_word * w;
+ ZoO_char * next_sentence;
+
+ w = (k->words + word_id);
+
+ if
+ (
+ (w->special == ZoO_WORD_STARTS_SENTENCE)
+ || (w->occurrences == 0)
+ )
+ {
+ return current_sentence;
+ }
+
+ /* prevents current_sentence [restrict] */
+ next_sentence = current_sentence;
+
+ for (;;)
+ {
+ if (*credits == 0)
+ {
+ return current_sentence;
+ }
+
+ *credits -= 1;
+ word_id =
+ pick_an_index
+ (
+ w->occurrences,
+ w->backward_links_occurrences,
+ w->backward_links
+ );
+
+ w = (k->words + word_id);
+
+ switch (w->special)
+ {
+ case ZoO_WORD_HAS_NO_EFFECT:
+ /* FIXME: not overflow-safe. */
+ /* word also contains an '\0', which we will replace by a ' ' */
+ addition_size = w->word_size;
+ break;
+
+ case ZoO_WORD_ENDS_SENTENCE:
+ ZoO_S_WARNING("END OF LINE should not be prefixable.");
+ return current_sentence;
+
+ case ZoO_WORD_STARTS_SENTENCE:
+ return current_sentence;
+
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ /* word also contains an '\0', which we will remove. */
+ addition_size = w->word_size - 1;
+ break;
+ }
+
+ if (*sentence_size > (SIZE_MAX - addition_size))
+ {
+ ZoO_S_WARNING
+ (
+ "Sentence construction aborted to avoid size_t overflow."
+ );
+
+ return current_sentence;
+ }
+
+ next_sentence =
+ (ZoO_char *) calloc
+ (
+ /* overflow-safe */
+ (*sentence_size + addition_size),
+ sizeof(ZoO_char)
+ );
+
+ if (next_sentence == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR("Could not allocate memory to store new sentence.");
+
+ return current_sentence;
+ }
+
+ /* overflow-safe */
+ *sentence_size = (*sentence_size + addition_size);
+
+ switch (w->special)
+ {
+ case ZoO_WORD_HAS_NO_EFFECT:
+ snprintf
+ (
+ next_sentence,
+ *sentence_size,
+ " " ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL,
+ w->word,
+ current_sentence
+ );
+ break;
+
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ snprintf
+ (
+ next_sentence,
+ *sentence_size,
+ ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL,
+ w->word,
+ current_sentence
+ );
+ break;
+
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ snprintf
+ (
+ next_sentence,
+ *sentence_size,
+ ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL,
+ w->word,
+ /* Safe: strlen(current_sentence) >= 2 */
+ (current_sentence + 1)
+ );
+ break;
+
+ default:
+ /* TODO: PROGRAM LOGIC ERROR */
+ break;
+ }
+
+ free((void *) current_sentence);
+
+ /* prevents current_sentence [const] */
+ current_sentence = next_sentence;
+ }
+}
+
+static unsigned char * extend_right
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index word_id,
+ ZoO_char current_sentence [static 1],
+ size_t sentence_size [const restrict static 1],
+ ZoO_index credits [const static 1]
+)
+{
+ size_t addition_size;
+ struct ZoO_knowledge_word * w;
+ ZoO_char * next_sentence;
+
+ w = (k->words + word_id);
+
+ if
+ (
+ (w->special == ZoO_WORD_ENDS_SENTENCE)
+ || (w->occurrences == 0)
+ )
+ {
+ return current_sentence;
+ }
+
+ /* prevents current_sentence [restrict] */
+ next_sentence = current_sentence;
+
+ for (;;)
+ {
+ if (*credits == 0)
+ {
+ return current_sentence;
+ }
+
+ *credits -= 1;
+
+ word_id =
+ pick_an_index
+ (
+ w->occurrences,
+ w->forward_links_occurrences,
+ w->forward_links
+ );
+
+ w = (k->words + word_id);
+
+ switch (w->special)
+ {
+ case ZoO_WORD_HAS_NO_EFFECT:
+ /* FIXME: Assumed to be overflow-safe. */
+ /* word also contains an '\0', which we will replace by a ' '. */
+ addition_size = w->word_size;
+ break;
+
+ case ZoO_WORD_ENDS_SENTENCE:
+ return current_sentence;
+
+ case ZoO_WORD_STARTS_SENTENCE:
+ ZoO_S_WARNING("START OF LINE should not be suffixable.");
+ return current_sentence;
+
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ /* word also contains an '\0', which we will remove. */
+ addition_size = w->word_size - 1;
+ break;
+ }
+
+ if (*sentence_size > (SIZE_MAX - addition_size))
+ {
+ ZoO_S_WARNING
+ (
+ "Sentence construction aborted to avoid size_t overflow."
+ );
+
+ return current_sentence;
+ }
+
+ next_sentence =
+ (ZoO_char *) calloc
+ (
+ /* overflow-safe */
+ (*sentence_size + addition_size),
+ sizeof(ZoO_char)
+ );
+
+ if (next_sentence == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR("Could not allocate memory to store new sentence.");
+
+ return current_sentence;
+ }
+
+ /* overflow-safe */
+ *sentence_size = (*sentence_size + addition_size);
+
+ switch (w->special)
+ {
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ printf
+ (
+ "current sentence:'%s', pointing at '%c'.\n",
+ current_sentence,
+ current_sentence[*sentence_size - addition_size - 2]
+ );
+ current_sentence[*sentence_size - addition_size - 2] = '\0';
+
+ case ZoO_WORD_HAS_NO_EFFECT:
+ snprintf
+ (
+ next_sentence,
+ *sentence_size,
+ ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL " ",
+ current_sentence,
+ w->word
+ );
+ break;
+
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ snprintf
+ (
+ next_sentence,
+ *sentence_size,
+ ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL,
+ current_sentence,
+ w->word
+ );
+ break;
+
+ default:
+ /* TODO: PROGRAM LOGIC ERROR */
+ break;
+ }
+
+ free((void *) current_sentence);
+
+ /* prevents current_sentence [const] */
+ current_sentence = next_sentence;
+ }
+}
+
+int ZoO_knowledge_extend
+(
+ struct ZoO_knowledge k [const static 1],
+ const struct ZoO_strings string [const static 1],
+ int const ignore_first_word,
+ ZoO_char * result [const static 1]
+)
+{
+ int word_found;
+ size_t sentence_size;
+ ZoO_index i, word_id, word_min_score, word_min_id, credits;
+
+ word_found = 0;
+ credits = ZoO_MAX_REPLY_WORDS;
+
+ if (ignore_first_word)
+ {
+ i = 1;
+ }
+ else
+ {
+ i = 0;
+ }
+
+ for (; i < string->words_count; ++i)
+ {
+ /* prevents k [restrict] */
+ if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0)
+ {
+ word_found = 1;
+ word_min_score = k->words[word_min_id].occurrences;
+
+ break;
+ }
+ }
+
+ if (word_found == 0)
+ {
+ word_min_id = (rand() % k->words_count);
+ word_min_score = k->words[word_min_id].occurrences;
+ }
+
+ for (; i < string->words_count; ++i)
+ {
+ if
+ (
+ (ZoO_knowledge_find(k, string->words[i], &word_id) == 0)
+ && (k->words[word_id].occurrences < word_min_score)
+ )
+ {
+ word_min_score = k->words[word_id].occurrences;
+ word_min_id = word_id;
+ }
+ }
+
+ /* 3: 2 spaces + '\0' */
+ /* FIXME: not overflow-safe */
+ switch (k->words[word_min_id].special)
+ {
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ /* word + ' ' + '\0' */
+ sentence_size = (strlen(k->words[word_min_id].word) + 2);
+ break;
+
+ case ZoO_WORD_HAS_NO_EFFECT:
+ /* word + ' ' * 2 + '\0' */
+ sentence_size = (strlen(k->words[word_min_id].word) + 3);
+ break;
+
+ default:
+ ZoO_WARNING
+ (
+ "'%s' was unexpectedly selected as pillar.",
+ k->words[word_min_id].word
+ );
+ /* word + '[' + ']' + ' ' * 2 + '\0' */
+ sentence_size = (strlen(k->words[word_min_id].word) + 5);
+ break;
+ }
+
+ *result = (ZoO_char *) calloc(sentence_size, sizeof(ZoO_char));
+
+ if (*result == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR("Could not allocate memory to start sentence.");
+
+ return -2;
+ }
+
+ switch (k->words[word_min_id].special)
+ {
+ case ZoO_WORD_REMOVES_LEFT_SPACE:
+ snprintf
+ (
+ *result,
+ sentence_size,
+ ZoO_CHAR_STRING_SYMBOL " ",
+ k->words[word_min_id].word
+ );
+ break;
+
+ case ZoO_WORD_REMOVES_RIGHT_SPACE:
+ snprintf
+ (
+ *result,
+ sentence_size,
+ " " ZoO_CHAR_STRING_SYMBOL,
+ k->words[word_min_id].word
+ );
+ break;
+
+ case ZoO_WORD_HAS_NO_EFFECT:
+ snprintf
+ (
+ *result,
+ sentence_size,
+ " " ZoO_CHAR_STRING_SYMBOL " ",
+ k->words[word_min_id].word
+ );
+ break;
+
+ default:
+ snprintf
+ (
+ *result,
+ sentence_size,
+ " [" ZoO_CHAR_STRING_SYMBOL "] ",
+ k->words[word_min_id].word
+ );
+ break;
+ }
+
+ if ((word_min_score == 0) || (credits == 0))
+ {
+ return 0;
+ }
+
+ --credits;
+
+ /* prevents result [restrict] */
+ *result = extend_left(k, word_min_id, *result, &sentence_size, &credits);
+
+ if (*result == (ZoO_char *) NULL)
+ {
+ return -2;
+ }
+
+ *result = extend_right(k, word_min_id, *result, &sentence_size, &credits);
+
+ if (*result == (ZoO_char *) NULL)
+ {
+ return -2;
+ }
+
+ return 0;
+}
diff --git a/src/core/knowledge.c b/src/core/knowledge.c
new file mode 100644
index 0000000..31ccb97
--- /dev/null
+++ b/src/core/knowledge.c
@@ -0,0 +1,447 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../io/error.h"
+
+#include "knowledge.h"
+
+/* XXX: are we as close to immutable as we want to be? */
+unsigned int const ZoO_knowledge_punctuation_chars_count = 7;
+const ZoO_char const ZoO_knowledge_punctuation_chars[7] =
+ {
+ '!',
+ ',',
+ '.',
+ ':',
+ ';',
+ '?',
+ '~'
+ };
+
+/* XXX: are we as close to immutable as we want to be? */
+unsigned int const ZoO_knowledge_forbidden_chars_count = 8;
+const ZoO_char const ZoO_knowledge_forbidden_chars[8]=
+ {
+ '(',
+ ')',
+ '[',
+ ']',
+ '{',
+ '}',
+ '<',
+ '>'
+ };
+
+int ZoO_knowledge_find
+(
+ const struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ ZoO_index result [const restrict static 1]
+)
+{
+ int cmp;
+ ZoO_index i, current_min, current_max;
+
+ /* This is a binary search. */
+
+ if (k->words_count < 1)
+ {
+ *result = 0;
+
+ return -1;
+ }
+
+ current_min = 0;
+
+ /* overflow-safe: k->words_count >= 1 */
+ current_max = (k->words_count - 1);
+
+ for (;;)
+ {
+ /* FIXME: overflow-safe? */
+ i = ((current_min + current_max) / 2);
+
+ if (i == k->words_count)
+ {
+ *result = k->words_count;
+
+ return -1;
+ }
+
+ cmp =
+ /* XXX: Assumed to be compatible with ZoO_char */
+ strcmp
+ (
+ (char *) word,
+ (const char *) k->words[k->sorted_indices[i]].word
+ );
+
+ if (cmp > 0)
+ {
+ if ((current_min > current_max))
+ {
+ *result = (i + 1);
+
+ return -1;
+ }
+
+ /* FIXME: overflow-safe? */
+ current_min = (i + 1);
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *result = i;
+
+ return -1;
+ }
+
+ /* overflow-safe */
+ current_max = (i - 1);
+ }
+ else
+ {
+ *result = k->sorted_indices[i];
+
+ return 0;
+ }
+ }
+}
+
+static void word_init (struct ZoO_knowledge_word w [const restrict static 1])
+{
+ w->word_size = 0;
+ w->word = (ZoO_char *) NULL;
+ w->special = ZoO_WORD_HAS_NO_EFFECT;
+ w->occurrences = 1;
+ w->forward_links_count = 0;
+ w->backward_links_count = 0;
+ w->forward_links_occurrences = (ZoO_index *) NULL;
+ w->backward_links_occurrences = (ZoO_index *) NULL;
+ w->forward_links = (ZoO_index *) NULL;
+ w->backward_links = (ZoO_index *) NULL;
+}
+
+static int add_punctuation_nodes
+(
+ struct ZoO_knowledge k [const static 1]
+)
+{
+ int error;
+ char w[2];
+ ZoO_index i, id;
+
+ if (ZoO_knowledge_learn(k, "START OF LINE", &id) < 0)
+ {
+ ZoO_S_FATAL("Could not add 'START OF LINE' to knowledge.");
+
+ return -2;
+ }
+
+ k->words[id].special = ZoO_WORD_STARTS_SENTENCE;
+ k->words[id].occurrences = 0;
+
+ if (ZoO_knowledge_learn(k, "END OF LINE", &id) < 0)
+ {
+ ZoO_S_FATAL("Could not add 'END OF LINE' to knowledge.");
+
+ return -2;
+ }
+
+ k->words[id].special = ZoO_WORD_ENDS_SENTENCE;
+ k->words[id].occurrences = 0;
+
+ w[1] = '\0';
+
+ error = 0;
+
+ for (i = 0; i < ZoO_knowledge_punctuation_chars_count; ++i)
+ {
+ w[0] = ZoO_knowledge_punctuation_chars[i];
+
+ if (ZoO_knowledge_learn(k, w, &id) < 0)
+ {
+ ZoO_WARNING("Could not add '%s' to knowledge.", w);
+
+ error = -1;
+ }
+ else
+ {
+ k->words[id].special = ZoO_WORD_REMOVES_LEFT_SPACE;
+ k->words[id].occurrences = 0;
+ }
+ }
+
+ return error;
+}
+
+int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1])
+{
+ k->words_count = 0;
+ k->words = (struct ZoO_knowledge_word *) NULL;
+ k->sorted_indices = (ZoO_index *) NULL;
+
+ if (add_punctuation_nodes(k) < -1)
+ {
+ ZoO_knowledge_finalize(k);
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static void finalize_word
+(
+ struct ZoO_knowledge_word w [const restrict static 1]
+)
+{
+ if (w->word != (ZoO_char *) NULL)
+ {
+ free((void *) w->word);
+
+ w->word = (ZoO_char *) NULL;
+ }
+
+ if (w->forward_links_occurrences != (ZoO_index *) NULL)
+ {
+ free((void *) w->forward_links_occurrences);
+
+ w->forward_links_occurrences = (ZoO_index *) NULL;
+ }
+
+ if (w->backward_links_occurrences != (ZoO_index *) NULL)
+ {
+ free((void *) w->backward_links_occurrences);
+
+ w->backward_links_occurrences = (ZoO_index *) NULL;
+ }
+
+ if (w->forward_links != (ZoO_index *) NULL)
+ {
+ free((void *) w->forward_links);
+
+ w->forward_links = (ZoO_index *) NULL;
+ }
+
+ if (w->backward_links != (ZoO_index *) NULL)
+ {
+ free((void *) w->backward_links);
+
+ w->backward_links = (ZoO_index *) NULL;
+ }
+
+ w->forward_links_count = 0;
+ w->backward_links_count = 0;
+}
+
+void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1])
+{
+ ZoO_index i;
+
+ for (i = 0; i < k->words_count; ++i)
+ {
+ /* prevents k [restrict] */
+ finalize_word(k->words + i);
+ }
+
+ k->words_count = 0;
+
+ if (k->words != (struct ZoO_knowledge_word *) NULL)
+ {
+ free((void *) k->words);
+
+ k->words = (struct ZoO_knowledge_word *) NULL;
+ }
+
+ if (k->sorted_indices != (ZoO_index *) NULL)
+ {
+ free((void *) k->sorted_indices);
+
+ k->sorted_indices = (ZoO_index *) NULL;
+ }
+}
+
+int ZoO_knowledge_learn
+(
+ struct ZoO_knowledge k [const static 1],
+ const ZoO_char word [const restrict static 1],
+ ZoO_index result [const restrict static 1]
+)
+{
+ struct ZoO_knowledge_word * new_wordlist;
+ ZoO_index * new_sorted_indices;
+ ZoO_index temp;
+
+ /* prevents k [restrict] */
+ if (ZoO_knowledge_find(k, word, result) == 0)
+ {
+ if (k->words[*result].occurrences == ZoO_INDEX_MAX)
+ {
+ ZoO_WARNING
+ (
+ "Maximum number of occurrences has been reached for word '"
+ ZoO_CHAR_STRING_SYMBOL
+ "'.",
+ word
+ );
+
+ return -1;
+ }
+
+ /* overflow-safe */
+ k->words[*result].occurrences += 1;
+
+ return 0;
+ }
+
+ if (k->words_count == ZoO_INDEX_MAX)
+ {
+ ZoO_S_WARNING("Maximum number of words has been reached.");
+
+ return -1;
+ }
+
+ new_wordlist =
+ (struct ZoO_knowledge_word *) realloc
+ (
+ (void *) k->words,
+ (
+ (
+ /* overflow-safe: k->words_count < ZoO_INDEX_MAX */
+ (size_t) (k->words_count + 1)
+ )
+ * sizeof(struct ZoO_knowledge_word)
+ )
+ );
+
+ if (new_wordlist == (struct ZoO_knowledge_word *) NULL)
+ {
+ ZoO_ERROR
+ (
+ "Could not learn the word '%s': unable to realloc the word list.",
+ word
+ );
+
+ return -1;
+ }
+
+ k->words = new_wordlist;
+
+ new_sorted_indices =
+ (ZoO_index *) realloc
+ (
+ (void *) k->sorted_indices,
+ (
+ (
+ /* overflow-safe: k->words_count < ZoO_INDEX_MAX */
+ (size_t) (k->words_count + 1)
+ )
+ * sizeof(ZoO_index)
+ )
+ );
+
+ if (new_sorted_indices == (ZoO_index *) NULL)
+ {
+ ZoO_ERROR
+ (
+ "Could not learn the word '"
+ ZoO_CHAR_STRING_SYMBOL
+ "': unable to realloc the index list.",
+ word
+ );
+
+ return -1;
+ }
+
+ k->sorted_indices = new_sorted_indices;
+
+ /* We can only move indices right of *result if they exist. */
+ if (*result != k->words_count)
+ {
+ /* TODO: check if correct. */
+ memmove
+ (
+ /*
+ * overflow-safe:
+ * - k->words_count < ZoO_INDEX_MAX
+ * - (k->sorted_indices + *result + 1) =< k->words_count
+ */
+ (void *) (k->sorted_indices + *result + 1),
+ /* overflow-safe: see above */
+ (const void *) (k->sorted_indices + *result),
+ (
+ (
+ /* overflow-safe: *result < k->words_count */
+ (size_t) (k->words_count - *result)
+ )
+ * sizeof(ZoO_index)
+ )
+ );
+ }
+
+ temp = *result;
+
+ k->sorted_indices[*result] = k->words_count;
+
+ *result = k->words_count;
+
+ word_init(k->words + *result);
+
+ /* XXX: strlen assumed to work with ZoO_char. */
+ k->words[*result].word_size = strlen(word);
+
+ if (k->words[*result].word_size == SIZE_MAX)
+ {
+ ZoO_S_WARNING
+ (
+ "Could not learn word that had a size too big to store in a '\\0' "
+ "terminated string. Chances are, this is but a symptom of the real "
+ "problem."
+ );
+
+ return -1;
+ }
+
+ /* We also need '\0' */
+ k->words[*result].word_size += 1;
+
+ k->words[*result].word =
+ (ZoO_char *) calloc
+ (
+ k->words[*result].word_size,
+ sizeof(ZoO_char)
+ );
+
+ if (k->words[*result].word == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Could not learn word due to being unable to allocate the memory to "
+ "store it."
+ );
+
+ k->words[*result].word_size = 0;
+
+ return -1;
+ }
+
+ memcpy(k->words[*result].word, word, k->words[*result].word_size);
+
+ /* Safe: k->words_count < ZoO_INDEX_MAX */
+ k->words_count += 1;
+
+ ZoO_DEBUG
+ (
+ ZoO_DEBUG_LEARNING,
+ "Learned word {'%s', id: %u, rank: %u}",
+ word,
+ *result,
+ temp
+ );
+
+ return 0;
+}
+
diff --git a/src/core/knowledge.h b/src/core/knowledge.h
new file mode 100644
index 0000000..f20cb16
--- /dev/null
+++ b/src/core/knowledge.h
@@ -0,0 +1,42 @@
+#ifndef _ZoO_CORE_KNOWLEDGE_H_
+#define _ZoO_CORE_KNOWLEDGE_H_
+
+#include "../tool/strings_types.h"
+
+#include "knowledge_types.h"
+
+int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]);
+
+void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]);
+
+int ZoO_knowledge_find
+(
+ const struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ ZoO_index result [const restrict static 1]
+);
+
+int ZoO_knowledge_learn
+(
+ struct ZoO_knowledge k [const static 1],
+ const ZoO_char word [const restrict static 1],
+ ZoO_index result [const restrict static 1]
+);
+
+int ZoO_knowledge_assimilate
+(
+ struct ZoO_knowledge k [const static 1],
+ struct ZoO_strings string [const restrict static 1],
+ ZoO_index const aliases_count,
+ const char * restrict aliases [const restrict static aliases_count]
+);
+
+int ZoO_knowledge_extend
+(
+ struct ZoO_knowledge k [const static 1],
+ const struct ZoO_strings string [const static 1],
+ int const ignore_first_word,
+ ZoO_char * result [const static 1]
+);
+
+#endif
diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h
new file mode 100644
index 0000000..f2e8161
--- /dev/null
+++ b/src/core/knowledge_types.h
@@ -0,0 +1,46 @@
+#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_
+#define _ZoO_CORE_KNOWLEDGE_TYPES_H_
+
+#include "../pervasive.h"
+
+#define ZoO_WORD_START_OF_LINE 0
+#define ZoO_WORD_END_OF_LINE 1
+
+/* XXX: are we as close to immutable as we want to be? */
+extern unsigned int const ZoO_knowledge_punctuation_chars_count;
+extern const ZoO_char const ZoO_knowledge_punctuation_chars[7];
+extern unsigned int const ZoO_knowledge_forbidden_chars_count;
+extern const ZoO_char const ZoO_knowledge_forbidden_chars[8];
+
+
+enum ZoO_knowledge_special_effect
+{
+ ZoO_WORD_HAS_NO_EFFECT,
+ ZoO_WORD_ENDS_SENTENCE,
+ ZoO_WORD_STARTS_SENTENCE,
+ ZoO_WORD_REMOVES_LEFT_SPACE,
+ ZoO_WORD_REMOVES_RIGHT_SPACE
+};
+
+struct ZoO_knowledge_word
+{
+ size_t word_size;
+ ZoO_char * word;
+ enum ZoO_knowledge_special_effect special;
+ ZoO_index occurrences;
+ ZoO_index forward_links_count;
+ ZoO_index backward_links_count;
+ ZoO_index * forward_links_occurrences;
+ ZoO_index * backward_links_occurrences;
+ ZoO_index * forward_links;
+ ZoO_index * backward_links;
+};
+
+struct ZoO_knowledge
+{
+ ZoO_index words_count;
+ ZoO_index * sorted_indices;
+ struct ZoO_knowledge_word * words;
+};
+
+#endif
diff --git a/src/core/main.c b/src/core/main.c
new file mode 100644
index 0000000..34233d6
--- /dev/null
+++ b/src/core/main.c
@@ -0,0 +1,296 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+
+#include "../tool/strings.h"
+
+#include "../io/error.h"
+#include "../io/parameters.h"
+#include "../io/data_input.h"
+#include "../io/network.h"
+
+
+#include "knowledge.h"
+
+#include "state_types.h"
+
+static int run = 1;
+
+static void request_termination (int const signo)
+{
+ if ((signo == SIGINT) || (signo == SIGTERM))
+ {
+ run = 0;
+ }
+}
+
+static int initialize
+(
+ struct ZoO_state s [const static 1],
+ int const argc,
+ const char * argv [const static argc]
+)
+{
+ ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing...");
+
+ srand(time(NULL));
+
+ /* prevents s [restrict] */
+ if (ZoO_knowledge_initialize(&(s->knowledge)) < 0)
+ {
+ return -1;
+ }
+
+ if (ZoO_parameters_initialize(&(s->param), argc, argv) < 0)
+ {
+ ZoO_knowledge_finalize(&(s->knowledge));
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static int load_data_file (struct ZoO_state s [const static 1])
+{
+ struct ZoO_data_input input;
+ char * result;
+
+ if (ZoO_data_input_open(&input, s->param.data_filename) < 0)
+ {
+ return -1;
+ }
+
+ while
+ (
+ ZoO_data_input_read_line
+ (
+ &input,
+ ZoO_knowledge_punctuation_chars_count,
+ ZoO_knowledge_punctuation_chars
+ ) == 0
+ )
+ {
+ (void) ZoO_knowledge_assimilate
+ (
+ &(s->knowledge),
+ &(input.string),
+ s->param.aliases_count,
+ s->param.aliases
+ );
+ }
+
+ ZoO_data_input_close(&input);
+
+ return 0;
+}
+
+static int finalize (struct ZoO_state s [const static 1])
+{
+ int error;
+
+ ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing...");
+
+ error = 0;
+
+ /* prevents s [restrict] */
+ ZoO_knowledge_finalize(&(s->knowledge));
+
+ return error;
+}
+
+static int network_connect (struct ZoO_state s [const static 1])
+{
+ return
+ ZoO_network_connect
+ (
+ &(s->network),
+ s->param.irc_server_addr,
+ s->param.irc_server_port,
+ s->param.irc_server_channel,
+ s->param.irc_username,
+ s->param.irc_realname,
+ s->param.aliases[0]
+ );
+}
+
+static int should_reply
+(
+ struct ZoO_parameters param [const restrict static 1],
+ struct ZoO_strings string [const restrict static 1],
+ int should_learn [const restrict static 1]
+)
+{
+ ZoO_index i, j;
+
+ for (i = 0; i < param->aliases_count; ++i)
+ {
+ if (ZoO_IS_PREFIX(param->aliases[i], string->words[0]))
+ {
+ *should_learn = 0;
+
+ return 1;
+ }
+
+ for (j = 1; j < string->words_count; ++j)
+ {
+ if (ZoO_IS_PREFIX(param->aliases[i], string->words[j]))
+ {
+ *should_learn = 1;
+
+ return 1;
+ }
+ }
+ }
+
+ *should_learn = 1;
+
+ return (param->reply_rate >= (rand() % 100));
+}
+
+static void handle_message
+(
+ struct ZoO_state s [const static 1],
+ struct ZoO_strings string [const restrict static 1],
+ ssize_t const msg_offset,
+ ssize_t const msg_size
+)
+{
+ ZoO_char * line;
+ int reply, learn;
+
+ if
+ (
+ ZoO_strings_parse
+ (
+ string,
+ (size_t) msg_size,
+ (s->network.msg + msg_offset),
+ ZoO_knowledge_punctuation_chars_count,
+ ZoO_knowledge_punctuation_chars
+ ) < 0
+ )
+ {
+ ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg.");
+
+ return;
+ }
+
+ if (string->words_count == 0)
+ {
+ return;
+ }
+
+ reply = should_reply(&(s->param), string, &learn);
+
+ if
+ (
+ reply
+ &&
+ (
+ ZoO_knowledge_extend
+ (
+ &(s->knowledge),
+ string,
+ !learn,
+ &line
+ ) == 0
+ )
+ )
+ {
+ if (line[0] == ' ')
+ {
+ strcpy((s->network.msg), (line + 1));
+ }
+ else
+ {
+ strcpy((s->network.msg), line);
+ }
+
+ free((void *) line);
+
+ ZoO_network_send(&(s->network));
+ }
+
+ if (learn)
+ {
+ (void) ZoO_knowledge_assimilate
+ (
+ &(s->knowledge),
+ string,
+ s->param.aliases_count,
+ s->param.aliases
+ );
+ }
+}
+
+static int main_loop (struct ZoO_state s [const static 1])
+{
+ struct ZoO_strings string;
+ ssize_t msg_offset, msg_size;
+
+ msg_offset = 0;
+ msg_size = 0;
+
+ ZoO_strings_initialize(&string);
+
+ while (run)
+ {
+ if (ZoO_network_receive(&(s->network), &msg_offset, &msg_size) == 0)
+ {
+ handle_message(s, &string, msg_offset, msg_size);
+ }
+ }
+
+ ZoO_strings_finalize(&string);
+
+ ZoO_network_disconnect(&(s->network));
+
+ return 0;
+}
+
+int main (int const argc, const char * argv [const static argc])
+{
+ struct ZoO_state s;
+
+ if (initialize(&s, argc, argv) < 0)
+ {
+ return -1;
+ }
+
+ if (load_data_file(&s) < 0)
+ {
+ goto CRASH;
+ }
+
+ if (network_connect(&s) < 0)
+ {
+ goto CRASH;
+ }
+
+ if (main_loop(&s) < 0)
+ {
+ goto CRASH;
+ }
+
+ (void) finalize(&s);
+
+ ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally.");
+
+ return 0;
+
+ CRASH:
+ {
+ (void) finalize(&s);
+
+ ZoO_S_DEBUG
+ (
+ ZoO_DEBUG_PROGRAM_FLOW,
+ "Zero of One terminated by crashing."
+ );
+
+ return -1;
+ }
+}
diff --git a/src/core/state_types.h b/src/core/state_types.h
new file mode 100644
index 0000000..89c814e
--- /dev/null
+++ b/src/core/state_types.h
@@ -0,0 +1,16 @@
+#ifndef _ZoO_CORE_STATE_TYPES_H_
+#define _ZoO_CORE_STATE_TYPES_H_
+
+#include "../io/parameters_types.h"
+#include "../io/network_types.h"
+
+#include "knowledge_types.h"
+
+struct ZoO_state
+{
+ struct ZoO_parameters param;
+ struct ZoO_knowledge knowledge;
+ struct ZoO_network network;
+};
+
+#endif