summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2017-01-18 19:09:16 +0100
committerNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2017-01-18 19:09:16 +0100
commit0d49fb74eadcf933f696420cd182077927680d26 (patch)
tree9220d260ce878f369138da12dae0300cf9ade5c9 /src/knowledge
parent24afb3e60bafd98e6a83dcb41ee6a7f7d41e76bc (diff)
Done with 'core', starting to work on 'knowledge'.
Diffstat (limited to 'src/knowledge')
-rw-r--r--src/knowledge/CMakeLists.txt11
-rw-r--r--src/knowledge/knowledge.c21
-rw-r--r--src/knowledge/knowledge.h110
-rw-r--r--src/knowledge/knowledge_finalize.c122
-rw-r--r--src/knowledge/knowledge_learn_sequence.c324
-rw-r--r--src/knowledge/knowledge_learn_word.c276
-rw-r--r--src/knowledge/knowledge_search.c336
-rw-r--r--src/knowledge/knowledge_types.h38
8 files changed, 1238 insertions, 0 deletions
diff --git a/src/knowledge/CMakeLists.txt b/src/knowledge/CMakeLists.txt
new file mode 100644
index 0000000..1245321
--- /dev/null
+++ b/src/knowledge/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_sequence.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_word.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c
+)
+
+set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
+
diff --git a/src/knowledge/knowledge.c b/src/knowledge/knowledge.c
new file mode 100644
index 0000000..a72969e
--- /dev/null
+++ b/src/knowledge/knowledge.c
@@ -0,0 +1,21 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/** Basic functions of the ZoO_knowledge structure ****************************/
+
+/* See: "knowledge.h" */
+void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1])
+{
+ k->words = (struct ZoO_knowledge_word *) NULL;
+ k->words_length = 0;
+ k->words_sorted = (ZoO_index *) NULL;
+
+ k->sequences = (ZoO_index **) NULL;
+ k->sequences_length = 0;
+ k->sequences_sorted = (ZoO_index *) NULL;
+}
diff --git a/src/knowledge/knowledge.h b/src/knowledge/knowledge.h
new file mode 100644
index 0000000..51d94c4
--- /dev/null
+++ b/src/knowledge/knowledge.h
@@ -0,0 +1,110 @@
+#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_H_
+#define _ZoO_KNOWLEDGE_KNOWLEDGE_H_
+
+#include "../core/char_types.h"
+#include "../core/index_types.h"
+
+#include "knowledge_types.h"
+
+void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1]);
+
+void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]);
+
+/*
+ * When returning 0:
+ * {word} was added to {k}, or was already there.
+ * {*result} indicates where {word} is in {k->words}.
+ *
+ * When returning -1:
+ * Something went wrong when adding the occurrence of {word} to {k}.
+ * {k} remains semantically unchanged.
+ * {*result} may or may not have been altered.
+ */
+int ZoO_knowledge_learn_word
+(
+ struct ZoO_knowledge k [const static 1],
+ const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
+ ZoO_index result [const restrict static 1]
+);
+
+int ZoO_knowledge_learn_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order
+);
+
+int ZoO_knowledge_learn_markov_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order
+);
+
+int ZoO_knowledge_get_following_sequences_ref
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index initial_word,
+ const ZoO_index * restrict following_sequences_ref [const restrict static 1],
+ const ZoO_index * restrict following_sequences_weights [const restrict static 1],
+ ZoO_index following_sequences_weights_sum [const static 1]
+);
+
+int ZoO_knowledge_get_sequence
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequences_ref,
+ const ZoO_index * restrict sequence [const restrict static 1]
+);
+
+int ZoO_knowledge_get_word
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index word_ref,
+ const ZoO_char * word [const restrict static 1],
+ size_t word_size [const restrict static 1]
+);
+
+/*
+ * When returning 0:
+ * {word} is in {k}.
+ * {word} is located at {k->words[*result]}.
+ *
+ * When returning -1:
+ * {word} is not in {k}.
+ * {*result} is where {word} was expected to be found in
+ * {k->sorted_indices}.
+ */
+int ZoO_knowledge_find_word_id
+(
+ const struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const size_t word_size,
+ ZoO_index result [const restrict static 1]
+);
+
+int ZoO_knowledge_find_preceding_words
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict],
+ const ZoO_index markov_order,
+ const ZoO_index * restrict preceding_words [const restrict static 1],
+ const ZoO_index * restrict preceding_words_weights [const restrict static 1],
+ ZoO_index preceding_words_weights_sum [const restrict static 1]
+);
+
+int ZoO_knowledge_find_following_words
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order,
+ const ZoO_index * restrict following_words [const restrict static 1],
+ const ZoO_index * restrict following_words_weights [const restrict static 1],
+ ZoO_index following_words_weights_sum [const restrict static 1]
+);
+
+#endif
diff --git a/src/knowledge/knowledge_finalize.c b/src/knowledge/knowledge_finalize.c
new file mode 100644
index 0000000..36a7406
--- /dev/null
+++ b/src/knowledge/knowledge_finalize.c
@@ -0,0 +1,122 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+static void knowledge_sequence_collection_finalize
+(
+ struct ZoO_knowledge_sequence_collection c [const restrict static 1]
+)
+{
+ ZoO_index i;
+
+ if (c->sequences_ref != (ZoO_index *) NULL)
+ {
+ free((void *) c->sequences_ref);
+ c->sequences_ref = (ZoO_index *) NULL;
+ }
+
+ if (c->sequences_ref_sorted != (ZoO_index *) NULL)
+ {
+ free((void *) c->sequences_ref_sorted);
+ c->sequences_ref_sorted = (ZoO_index *) NULL;
+ }
+
+ if (c->occurrences != (ZoO_index *) NULL)
+ {
+ free((void *) c->occurrences);
+ c->occurrences = (ZoO_index *) NULL;
+ }
+
+ for (i = 0; i < c->sequences_ref_length; ++i)
+ {
+ free((void *) c->targets[i]);
+ free((void *) c->targets_occurrences[i]);
+ }
+
+ c->sequences_ref_length = 0;
+
+ if (c->targets != (ZoO_index **) NULL)
+ {
+ free((void *) c->targets);
+ c->targets != (ZoO_index **) NULL;
+ }
+
+ free((void *) c->targets_length);
+
+ if (c->targets_occurrences != (ZoO_index **) NULL)
+ {
+ free((void *) c->targets_occurrences);
+ c->targets_occurrences != (ZoO_index **) NULL;
+ }
+}
+
+static void knowledge_word_finalize
+(
+ struct ZoO_knowledge_word w [const restrict static 1]
+)
+{
+ w->word_size = 0;
+ w->occurrences = 0;
+
+ if (w->word != (ZoO_char *) NULL)
+ {
+ free((void *) w->word);
+
+ w->word = (ZoO_char *) NULL;
+ }
+
+ knowledge_sequence_collection_finalize(&(w->followed));
+ knowledge_sequence_collection_finalize(&(w->preceded));
+}
+
+/* See: "knowledge.h" */
+void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1])
+{
+ ZoO_index i;
+
+ for (i = 0; i < k->words_length; ++i)
+ {
+ knowledge_word_finalize(k->words + i);
+ }
+
+ k->words_length = 0;
+
+ if (k->words != (struct ZoO_knowledge_word *) NULL)
+ {
+ free((void *) k->words);
+
+ k->words = (struct ZoO_knowledge_word *) NULL;
+ }
+
+ if (k->words_sorted != (ZoO_index *) NULL)
+ {
+ free((void *) k->words_sorted);
+
+ k->words_sorted = (ZoO_index *) NULL;
+ }
+
+ for (i = 0; i < k->sequences_length; ++i)
+ {
+ free((void *) k->sequences[i]);
+ }
+
+ k->sequences_length = 0;
+
+ if (k->sequences != (ZoO_index **) NULL)
+ {
+ free((void *) k->sequences);
+
+ k->sequences = (ZoO_index **) NULL;
+ }
+
+ if (k->sequences_sorted != (ZoO_index *) NULL)
+ {
+ free((void *) k->sequences_sorted);
+
+ k->sequences_sorted = (ZoO_index *) NULL;
+ }
+}
diff --git a/src/knowledge/knowledge_learn_sequence.c b/src/knowledge/knowledge_learn_sequence.c
new file mode 100644
index 0000000..23a5ca7
--- /dev/null
+++ b/src/knowledge/knowledge_learn_sequence.c
@@ -0,0 +1,324 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../core/sequence.h"
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/******************************************************************************/
+/** INITIALIZE ****************************************************************/
+/******************************************************************************/
+static void set_nth_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sorted_sequence_id,
+ const ZoO_index sequence_id
+)
+{
+ /* Safe: (> k->sequences_length 1) */
+ if (sorted_sequence_id < (k->sequences_length - 1))
+ {
+ memmove
+ (
+ /* Safe: (=< (+ sorted_sequence_id 1) k->sequences_length) */
+ (void *) (k->sequences_sorted + (sorted_sequence_id + 1)),
+ (const void *) (k->sequences_sorted + sorted_sequence_id),
+ ((k->sequences_length - 1) - sorted_sequence_id)
+ );
+ }
+
+ k->sequences_sorted[sorted_sequence_id] = sequence_id;
+}
+
+/******************************************************************************/
+/** ALLOCATING MEMORY *********************************************************/
+/******************************************************************************/
+static int reallocate_sequences_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index ** new_sequences;
+
+ if ((SIZE_MAX / sizeof(ZoO_index *)) > (size_t) k->sequences_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sequences list, as it would overflow"
+ "size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_sequences =
+ (ZoO_index **) realloc
+ (
+ (void *) k->sequences,
+ (((size_t) k->sequences_length) * sizeof(ZoO_index *))
+ );
+
+ if (new_sequences == (ZoO_index **) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sequence list."
+ );
+
+ return -1;
+ }
+
+ k->sequences = new_sequences;
+
+ return 0;
+}
+
+static int reallocate_sequences_sorted_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index * new_sequences_sorted;
+
+ if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->sequences_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sorted sequences list, as it would"
+ " overflow size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_sequences_sorted =
+ (ZoO_index *) realloc
+ (
+ (void *) k->sequences_sorted,
+ ((size_t) k->sequences_length) * sizeof(ZoO_index)
+ );
+
+ if (new_sequences_sorted == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sorted sequences"
+ " list."
+ );
+
+ return -1;
+ }
+
+ k->sequences_sorted = new_sequences_sorted;
+
+ return 0;
+}
+
+/* Pre: (=< ZoO_INDEX_MAX SIZE_MAX) */
+static ZoO_index * copy_sequence
+(
+ const ZoO_index base [const restrict static 1],
+ const ZoO_index base_length,
+ const ZoO_index markov_order
+)
+{
+ ZoO_index * result;
+
+ result = (ZoO_index *) calloc((size_t) base_length, sizeof(ZoO_index));
+
+ if (result == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required to store a new sequence."
+ );
+
+ return (ZoO_index *) NULL;
+ }
+
+ memcpy
+ (
+ (void *) result,
+ (const void *) base,
+ (((size_t) base_length) * sizeof(ZoO_index))
+ );
+
+ return result;
+}
+
+static int add_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre (> markov_order 1) */
+ const ZoO_index sequence_id,
+ const ZoO_index sorted_sequence_id
+)
+{
+ ZoO_index * stored_sequence;
+
+ if (k->sequences_length == ZoO_INDEX_MAX)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to add sequence: the variable that stores the number of known "
+ "sequences would overflow."
+ );
+
+ return -1;
+ }
+
+ stored_sequence = copy_sequence(sequence, sequence_length, markov_order);
+
+ if (stored_sequence == (ZoO_index *) NULL)
+ {
+ return -1;
+ }
+
+ k->sequences_length += 1;
+
+ if (reallocate_sequences_list(k) < 0)
+ {
+ k->sequences_length -= 1;
+
+ return -1;
+ }
+
+ k->sequences[sequence_id] = stored_sequence;
+
+ if (reallocate_sequences_sorted_list(k) < 0)
+ {
+ k->sequences_length -= 1;
+
+ return -1;
+ }
+
+ set_nth_sequence(k, sorted_sequence_id, sequence_id);
+
+ return -1;
+}
+
+/******************************************************************************/
+/** SEARCH ********************************************************************/
+/******************************************************************************/
+
+static int find_sequence
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre: (> 1) */
+ ZoO_index sequence_id [const restrict static 1]
+)
+{
+ /* This is a binary search */
+ int cmp;
+ ZoO_index i, current_min, current_max;
+ const ZoO_index markov_sequence_length = (markov_order - 1);
+
+ /* Handles the case where the list is empty ********************************/
+ current_max = k->sequences_length;
+
+ if (current_max == 0)
+ {
+ *sequence_id = 0;
+
+ return -1;
+ }
+ /***************************************************************************/
+
+ current_min = 0;
+ current_max -= 1;
+
+ for (;;)
+ {
+ i = (current_min + ((current_max - current_min) / 2));
+
+ cmp =
+ ZoO_sequence_cmp
+ (
+ k->sequences[k->sequences_sorted[i]],
+ markov_sequence_length,
+ sequence,
+ sequence_length
+ );
+
+ if (cmp > 0)
+ {
+ current_min = (i + 1);
+
+ if (current_min > current_max)
+ {
+ *sequence_id = current_min;
+
+ return -1;
+ }
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *sequence_id = i;
+
+ return -1;
+ }
+
+ current_max = (i - 1);
+ }
+ else
+ {
+ *sequence_id = k->sequences_sorted[i];
+
+ return 0;
+ }
+ }
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+
+int ZoO_knowledge_learn_markov_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre (> markov_order 1) */
+ ZoO_index sequence_id [const restrict static 1]
+)
+{
+ ZoO_index sorted_id;
+
+ if
+ (
+ find_sequence
+ (
+ k,
+ sequence,
+ sequence_length,
+ markov_order,
+ sequence_id
+ ) == 0
+ )
+ {
+ return 0;
+ }
+
+ sorted_id = *sequence_id;
+ *sequence_id = k->sequences_length;
+
+ return
+ add_sequence
+ (
+ k,
+ sequence,
+ sequence_length,
+ markov_order,
+ *sequence_id,
+ sorted_id
+ );
+}
diff --git a/src/knowledge/knowledge_learn_word.c b/src/knowledge/knowledge_learn_word.c
new file mode 100644
index 0000000..f55ac5b
--- /dev/null
+++ b/src/knowledge/knowledge_learn_word.c
@@ -0,0 +1,276 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/******************************************************************************/
+/** INITIALIZING STRUCTURES ***************************************************/
+/******************************************************************************/
+
+static void initialize_sequence_collection
+(
+ struct ZoO_knowledge_sequence_collection c [const restrict static 1]
+)
+{
+ c->sequences_ref = (ZoO_index *) NULL;
+ c->sequences_ref_length = 0;
+ c->sequences_ref_sorted = (ZoO_index *) NULL;
+ c->occurrences = (ZoO_index *) NULL;
+ c->targets = (ZoO_index **) NULL;
+ c->targets_length = (ZoO_index *) NULL;
+ c->targets_occurrences = (ZoO_index **) NULL;
+}
+
+static void initialize_word
+(
+ struct ZoO_knowledge_word w [const restrict static 1]
+)
+{
+ w->word = (const ZoO_char *) NULL;
+ w->word_size = 0;
+ w->occurrences = 0;
+
+ initialize_sequence_collection(&(w->followed));
+ initialize_sequence_collection(&(w->preceded));
+}
+
+/******************************************************************************/
+/** ALLOCATING MEMORY *********************************************************/
+/******************************************************************************/
+static ZoO_char * copy_word
+(
+ const ZoO_char original [const restrict static 1],
+ const ZoO_index original_length
+)
+{
+ ZoO_char * result;
+
+ result =
+ (ZoO_char *)
+ calloc
+ (
+ (size_t) (original_length + 1),
+ sizeof(ZoO_char)
+ );
+
+ if (result == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR("Unable to allocate memory to store new word.");
+
+ return (ZoO_char *) NULL;
+ }
+
+ memcpy
+ (
+ (void *) result,
+ (const void *) original,
+ (((size_t) original_length) * sizeof(ZoO_char))
+ );
+
+ result[original_length] = '\0';
+
+ return 0;
+}
+
+static int reallocate_words_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ struct ZoO_knowledge_word * new_words;
+
+ if
+ (
+ (SIZE_MAX / sizeof(struct ZoO_knowledge_word)) > (size_t) k->words_length
+ )
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the words list, as it would overflow"
+ "size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_words =
+ (struct ZoO_knowledge_word *) realloc
+ (
+ (void *) k->words,
+ (((size_t) k->words_length) * sizeof(struct ZoO_knowledge_word))
+ );
+
+ if (new_words == (struct ZoO_knowledge_word *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new words list."
+ );
+
+ return -1;
+ }
+
+ k->words = new_words;
+
+ return 0;
+}
+
+static int reallocate_words_sorted_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index * new_words_sorted;
+
+ /*
+ * This has already been tested previously for a struct ZoO_knowledge_word,
+ * whose size is bigger than a ZoO_index.
+ * */
+ /*
+ if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->words_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sorted words list, as it would"
+ " overflow size_t variables."
+ );
+
+ return -1;
+ }
+ */
+
+ new_words_sorted =
+ (ZoO_index *) realloc
+ (
+ (void *) k->words_sorted,
+ (((size_t) k->words_length) * sizeof(ZoO_index))
+ );
+
+ if (new_words_sorted == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sorted words list."
+ );
+
+ return -1;
+ }
+
+ k->words_sorted = new_words_sorted;
+
+ return 0;
+}
+
+static void set_nth_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sorted_word_id,
+ const ZoO_index word_id
+)
+{
+ /* Safe: (> k->words_length 1) */
+ if (sorted_word_id < (k->words_length - 1))
+ {
+ memmove
+ (
+ /* Safe: (=< (+ sorted_word_id 1) k->words_length) */
+ (void *) (k->words_sorted + (sorted_word_id + 1)),
+ (const void *) (k->words_sorted + sorted_word_id),
+ ((k->words_length - 1) - sorted_word_id)
+ );
+ }
+
+ k->words_sorted[sorted_word_id] = word_id;
+}
+
+static int add_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
+ const ZoO_index word_id,
+ const ZoO_index sorted_word_id
+)
+{
+ ZoO_char * stored_word;
+
+ if (k->words_length == ZoO_INDEX_MAX)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to add word: the variable that stores the number of known "
+ "words would overflow."
+ );
+
+ return -1;
+ }
+
+ stored_word = copy_word(word, word_length);
+
+ if (stored_word == (ZoO_char *) NULL)
+ {
+ return -1;
+ }
+
+ k->words_length += 1;
+
+ if (reallocate_words_list(k) < 0)
+ {
+ k->words_length -= 1;
+
+ return -1;
+ }
+
+ initialize_word(k->words + word_id);
+
+ k->words[word_id].word = stored_word;
+ k->words[word_id].word_size = ((word_length + 1) * sizeof(ZoO_char));
+
+ if (reallocate_words_sorted_list(k) < 0)
+ {
+ k->words_length -= 1;
+
+ return -1;
+ }
+
+ set_nth_word(k, sorted_word_id, word_id);
+
+ return -1;
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+
+int ZoO_knowledge_learn_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
+ ZoO_index word_id [const restrict static 1]
+)
+{
+ ZoO_index sorted_id;
+
+ if
+ (
+ ZoO_knowledge_find_word_id
+ (
+ k,
+ word,
+ (word_length * sizeof(ZoO_char)),
+ word_id
+ ) == 0
+ )
+ {
+ return 0;
+ }
+
+ sorted_id = *word_id;
+ *word_id = k->words_length;
+
+ return add_word(k, word, word_length, *word_id, sorted_id);
+}
diff --git a/src/knowledge/knowledge_search.c b/src/knowledge/knowledge_search.c
new file mode 100644
index 0000000..a48585b
--- /dev/null
+++ b/src/knowledge/knowledge_search.c
@@ -0,0 +1,336 @@
+#include <stdlib.h>
+
+#include "../core/char.h"
+#include "../core/index.h"
+#include "../core/sequence.h"
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/* See "knowledge.h". */
+int ZoO_knowledge_find_word_id
+(
+ const struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const size_t word_size,
+ ZoO_index result [const restrict static 1]
+)
+{
+ /* This is a binary search */
+ int cmp;
+ ZoO_index i, current_min, current_max;
+ ZoO_index candidate_id;
+
+ /* Handles the case where the list is empty ********************************/
+ current_max = k->words_length;
+
+ if (current_max == 0)
+ {
+ *result = 0;
+
+ return -1;
+ }
+ /***************************************************************************/
+
+ current_min = 0;
+ current_max -= 1;
+
+ for (;;)
+ {
+ i = (current_min + ((current_max - current_min) / 2));
+
+ cmp = ZoO_word_cmp(word, word_size, k->words[k->words_sorted[i]].word);
+
+ if (cmp > 0)
+ {
+ current_min = (i + 1);
+
+ if (current_min > current_max)
+ {
+ *result = current_min;
+
+ return -1;
+ }
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *result = current_min;
+
+ return -1;
+ }
+
+ current_max = (i - 1);
+ }
+ else
+ {
+ *result = k->words_sorted[i];
+
+ return 0;
+ }
+ }
+}
+
+int ZoO_knowledge_find_preceding_words
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict],
+ const ZoO_index markov_order, /* Pre: (> 0) */
+ const ZoO_index * restrict preceding_words [const restrict static 1],
+ const ZoO_index * restrict preceding_words_weights [const restrict static 1],
+ ZoO_index preceding_words_weights_sum [const restrict static 1]
+)
+{
+ /* This is a binary search */
+ int cmp;
+ ZoO_index i, current_min, current_max, local_sequence;
+ const ZoO_index * restrict candidate;
+ const ZoO_index markov_sequence_length = (markov_order - 1);
+ const ZoO_index word = sequence[markov_sequence_length];
+
+ if (word >= k->words_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Attempting to find the preceding words of an unknown word."
+ );
+
+ *preceding_words = (const ZoO_index *) NULL;
+ *preceding_words_weights = (const ZoO_index *) NULL;
+ *preceding_words_weights_sum = 0;
+
+ return -1;
+ }
+
+
+ if (markov_order == 1)
+ {
+ /* Special case: empty sequences. */
+ *preceding_words = (const ZoO_index *) k->words[word].preceded.targets;
+
+ *preceding_words_weights =
+ (const ZoO_index *) k->words[word].preceded.targets_occurrences;
+
+ *preceding_words_weights_sum = k->words[word].occurrences;
+
+ return 0;
+ }
+
+ /* Handles the case where the list is empty ********************************/
+ current_max = k->words[word].preceded.sequences_ref_length;
+
+ if (current_max == 0)
+ {
+ *preceding_words = (const ZoO_index *) NULL;
+ *preceding_words_weights = (const ZoO_index *) NULL;
+ *preceding_words_weights_sum = 0;
+
+ ZoO_S_ERROR
+ (
+ "Attempting to find the preceding words of a sequence that never had "
+ "any."
+ );
+
+ return -2;
+ }
+ /***************************************************************************/
+
+ current_min = 0;
+ current_max -= 1;
+
+ for (;;)
+ {
+ i = (current_min + ((current_max - current_min) / 2));
+
+ local_sequence = k->words[word].preceded.sequences_ref_sorted[i];
+
+ (void) ZoO_knowledge_get_sequence
+ (
+ k,
+ k->words[word].preceded.sequences_ref[local_sequence],
+ &candidate
+ );
+
+ cmp =
+ ZoO_sequence_cmp
+ (
+ sequence,
+ markov_sequence_length,
+ candidate,
+ markov_sequence_length
+ );
+
+ if (cmp > 0)
+ {
+ current_min = (i + 1);
+
+ if (current_min > current_max)
+ {
+ *preceding_words = (const ZoO_index *) NULL;
+ *preceding_words_weights = (const ZoO_index *) NULL;
+ *preceding_words_weights_sum = 0;
+
+ return -2;
+ }
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *preceding_words = (const ZoO_index *) NULL;
+ *preceding_words_weights = (const ZoO_index *) NULL;
+ *preceding_words_weights_sum = 0;
+
+ return -2;
+ }
+
+ current_max = (i - 1);
+ }
+ else
+ {
+ *preceding_words = k->words[word].preceded.targets[local_sequence];
+
+ *preceding_words_weights =
+ k->words[word].preceded.targets_occurrences[local_sequence];
+
+ *preceding_words_weights_sum =
+ k->words[word].preceded.occurrences[local_sequence];
+
+ return 0;
+ }
+ }
+}
+
+int ZoO_knowledge_find_following_words
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order,
+ const ZoO_index * restrict following_words [const restrict static 1],
+ const ZoO_index * restrict following_words_weights [const restrict static 1],
+ ZoO_index following_words_weights_sum [const restrict static 1]
+)
+{
+ /* This is a binary search */
+ int cmp;
+ ZoO_index i, current_min, current_max, local_sequence;
+ const ZoO_index * restrict candidate;
+ const ZoO_index markov_sequence_length = (markov_order - 1);
+ const ZoO_index sequence_offset =
+ ((sequence_length - markov_sequence_length) - 1);
+ const ZoO_index word = sequence[sequence_offset];
+
+ if (word >= k->words_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Attempting to find the following words of an unknown word."
+ );
+
+ *following_words = (const ZoO_index *) NULL;
+ *following_words_weights = (const ZoO_index *) NULL;
+ *following_words_weights_sum = 0;
+
+ return -1;
+ }
+
+ if (markov_order == 1)
+ {
+ /* Special case: empty sequences. */
+ *following_words = (const ZoO_index *) k->words[word].preceded.targets;
+
+ *following_words_weights =
+ (const ZoO_index *) k->words[word].preceded.targets_occurrences;
+
+ *following_words_weights_sum = k->words[word].occurrences;
+
+ return 0;
+ }
+
+ /* Handles the case where the list is empty ********************************/
+ current_max = k->words[word].preceded.sequences_ref_length;
+
+ if (current_max == 0)
+ {
+ *following_words = (const ZoO_index *) NULL;
+ *following_words_weights = (const ZoO_index *) NULL;
+ *following_words_weights_sum = 0;
+
+ ZoO_S_WARNING
+ (
+ "Attempting to find the following words of a sequence that never had "
+ "any."
+ );
+
+ return -2;
+ }
+ /***************************************************************************/
+
+ current_min = 0;
+ current_max -= 1;
+
+ for (;;)
+ {
+ i = (current_min + ((current_max - current_min) / 2));
+
+ local_sequence = k->words[word].followed.sequences_ref_sorted[i];
+
+ (void) ZoO_knowledge_get_sequence
+ (
+ k,
+ k->words[word].followed.sequences_ref[local_sequence],
+ &candidate
+ );
+
+ cmp =
+ ZoO_sequence_cmp
+ (
+ (sequence + sequence_offset),
+ markov_sequence_length,
+ candidate,
+ markov_sequence_length
+ );
+
+ if (cmp > 0)
+ {
+ current_min = (i + 1);
+
+ if (current_min > current_max)
+ {
+ *following_words = (const ZoO_index *) NULL;
+ *following_words_weights = (const ZoO_index *) NULL;
+ *following_words_weights_sum = 0;
+
+ return -2;
+ }
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *following_words = (const ZoO_index *) NULL;
+ *following_words_weights = (const ZoO_index *) NULL;
+ *following_words_weights_sum = 0;
+
+ return -2;
+ }
+
+ current_max = (i - 1);
+ }
+ else
+ {
+ *following_words = k->words[word].followed.targets[local_sequence];
+
+ *following_words_weights =
+ k->words[word].followed.targets_occurrences[local_sequence];
+
+ *following_words_weights_sum =
+ k->words[word].followed.occurrences[local_sequence];
+
+ return 0;
+ }
+ }
+}
diff --git a/src/knowledge/knowledge_types.h b/src/knowledge/knowledge_types.h
new file mode 100644
index 0000000..7eafc8b
--- /dev/null
+++ b/src/knowledge/knowledge_types.h
@@ -0,0 +1,38 @@
+#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_
+#define _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_
+
+#include "../core/index_types.h"
+#include "../core/char_types.h"
+
+struct ZoO_knowledge_sequence_collection
+{
+ ZoO_index * sequences_ref;
+ ZoO_index sequences_ref_length;
+ ZoO_index * sequences_ref_sorted;
+ ZoO_index * occurrences;
+ ZoO_index ** targets;
+ ZoO_index * targets_length;
+ ZoO_index ** targets_occurrences;
+};
+
+struct ZoO_knowledge_word
+{
+ const ZoO_char * word;
+ size_t word_size;
+ ZoO_index occurrences;
+ struct ZoO_knowledge_sequence_collection followed;
+ struct ZoO_knowledge_sequence_collection preceded;
+};
+
+struct ZoO_knowledge
+{
+ struct ZoO_knowledge_word * words;
+ ZoO_index words_length;
+ ZoO_index * words_sorted;
+ ZoO_index ** sequences;
+ ZoO_index sequences_length;
+ ZoO_index * sequences_sorted;
+ ZoO_index sequences_length;
+};
+
+#endif