| summaryrefslogtreecommitdiff |
diff options
| author | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2017-01-18 19:09:16 +0100 |
|---|---|---|
| committer | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2017-01-18 19:09:16 +0100 |
| commit | 0d49fb74eadcf933f696420cd182077927680d26 (patch) | |
| tree | 9220d260ce878f369138da12dae0300cf9ade5c9 /src/knowledge | |
| parent | 24afb3e60bafd98e6a83dcb41ee6a7f7d41e76bc (diff) | |
Done with 'core', starting to work on 'knowledge'.
Diffstat (limited to 'src/knowledge')
| -rw-r--r-- | src/knowledge/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/knowledge/knowledge.c | 21 | ||||
| -rw-r--r-- | src/knowledge/knowledge.h | 110 | ||||
| -rw-r--r-- | src/knowledge/knowledge_finalize.c | 122 | ||||
| -rw-r--r-- | src/knowledge/knowledge_learn_sequence.c | 324 | ||||
| -rw-r--r-- | src/knowledge/knowledge_learn_word.c | 276 | ||||
| -rw-r--r-- | src/knowledge/knowledge_search.c | 336 | ||||
| -rw-r--r-- | src/knowledge/knowledge_types.h | 38 |
8 files changed, 1238 insertions, 0 deletions
diff --git a/src/knowledge/CMakeLists.txt b/src/knowledge/CMakeLists.txt new file mode 100644 index 0000000..1245321 --- /dev/null +++ b/src/knowledge/CMakeLists.txt @@ -0,0 +1,11 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_word.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/knowledge/knowledge.c b/src/knowledge/knowledge.c new file mode 100644 index 0000000..a72969e --- /dev/null +++ b/src/knowledge/knowledge.c @@ -0,0 +1,21 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +/** Basic functions of the ZoO_knowledge structure ****************************/ + +/* See: "knowledge.h" */ +void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) +{ + k->words = (struct ZoO_knowledge_word *) NULL; + k->words_length = 0; + k->words_sorted = (ZoO_index *) NULL; + + k->sequences = (ZoO_index **) NULL; + k->sequences_length = 0; + k->sequences_sorted = (ZoO_index *) NULL; +} diff --git a/src/knowledge/knowledge.h b/src/knowledge/knowledge.h new file mode 100644 index 0000000..51d94c4 --- /dev/null +++ b/src/knowledge/knowledge.h @@ -0,0 +1,110 @@ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_H_ + +#include "../core/char_types.h" +#include "../core/index_types.h" + +#include "knowledge_types.h" + +void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1]); + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]); + +/* + * When returning 0: + * {word} was added to {k}, or was already there. + * {*result} indicates where {word} is in {k->words}. + * + * When returning -1: + * Something went wrong when adding the occurrence of {word} to {k}. + * {k} remains semantically unchanged. + * {*result} may or may not have been altered. + */ +int ZoO_knowledge_learn_word +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_learn_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order +); + +int ZoO_knowledge_learn_markov_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order +); + +int ZoO_knowledge_get_following_sequences_ref +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index initial_word, + const ZoO_index * restrict following_sequences_ref [const restrict static 1], + const ZoO_index * restrict following_sequences_weights [const restrict static 1], + ZoO_index following_sequences_weights_sum [const static 1] +); + +int ZoO_knowledge_get_sequence +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequences_ref, + const ZoO_index * restrict sequence [const restrict static 1] +); + +int ZoO_knowledge_get_word +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index word_ref, + const ZoO_char * word [const restrict static 1], + size_t word_size [const restrict static 1] +); + +/* + * When returning 0: + * {word} is in {k}. + * {word} is located at {k->words[*result]}. + * + * When returning -1: + * {word} is not in {k}. + * {*result} is where {word} was expected to be found in + * {k->sorted_indices}. + */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const size_t word_size, + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +); + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +); + +#endif diff --git a/src/knowledge/knowledge_finalize.c b/src/knowledge/knowledge_finalize.c new file mode 100644 index 0000000..36a7406 --- /dev/null +++ b/src/knowledge/knowledge_finalize.c @@ -0,0 +1,122 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +static void knowledge_sequence_collection_finalize +( + struct ZoO_knowledge_sequence_collection c [const restrict static 1] +) +{ + ZoO_index i; + + if (c->sequences_ref != (ZoO_index *) NULL) + { + free((void *) c->sequences_ref); + c->sequences_ref = (ZoO_index *) NULL; + } + + if (c->sequences_ref_sorted != (ZoO_index *) NULL) + { + free((void *) c->sequences_ref_sorted); + c->sequences_ref_sorted = (ZoO_index *) NULL; + } + + if (c->occurrences != (ZoO_index *) NULL) + { + free((void *) c->occurrences); + c->occurrences = (ZoO_index *) NULL; + } + + for (i = 0; i < c->sequences_ref_length; ++i) + { + free((void *) c->targets[i]); + free((void *) c->targets_occurrences[i]); + } + + c->sequences_ref_length = 0; + + if (c->targets != (ZoO_index **) NULL) + { + free((void *) c->targets); + c->targets != (ZoO_index **) NULL; + } + + free((void *) c->targets_length); + + if (c->targets_occurrences != (ZoO_index **) NULL) + { + free((void *) c->targets_occurrences); + c->targets_occurrences != (ZoO_index **) NULL; + } +} + +static void knowledge_word_finalize +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + w->word_size = 0; + w->occurrences = 0; + + if (w->word != (ZoO_char *) NULL) + { + free((void *) w->word); + + w->word = (ZoO_char *) NULL; + } + + knowledge_sequence_collection_finalize(&(w->followed)); + knowledge_sequence_collection_finalize(&(w->preceded)); +} + +/* See: "knowledge.h" */ +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) +{ + ZoO_index i; + + for (i = 0; i < k->words_length; ++i) + { + knowledge_word_finalize(k->words + i); + } + + k->words_length = 0; + + if (k->words != (struct ZoO_knowledge_word *) NULL) + { + free((void *) k->words); + + k->words = (struct ZoO_knowledge_word *) NULL; + } + + if (k->words_sorted != (ZoO_index *) NULL) + { + free((void *) k->words_sorted); + + k->words_sorted = (ZoO_index *) NULL; + } + + for (i = 0; i < k->sequences_length; ++i) + { + free((void *) k->sequences[i]); + } + + k->sequences_length = 0; + + if (k->sequences != (ZoO_index **) NULL) + { + free((void *) k->sequences); + + k->sequences = (ZoO_index **) NULL; + } + + if (k->sequences_sorted != (ZoO_index *) NULL) + { + free((void *) k->sequences_sorted); + + k->sequences_sorted = (ZoO_index *) NULL; + } +} diff --git a/src/knowledge/knowledge_learn_sequence.c b/src/knowledge/knowledge_learn_sequence.c new file mode 100644 index 0000000..23a5ca7 --- /dev/null +++ b/src/knowledge/knowledge_learn_sequence.c @@ -0,0 +1,324 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/sequence.h" + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZE ****************************************************************/ +/******************************************************************************/ +static void set_nth_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sorted_sequence_id, + const ZoO_index sequence_id +) +{ + /* Safe: (> k->sequences_length 1) */ + if (sorted_sequence_id < (k->sequences_length - 1)) + { + memmove + ( + /* Safe: (=< (+ sorted_sequence_id 1) k->sequences_length) */ + (void *) (k->sequences_sorted + (sorted_sequence_id + 1)), + (const void *) (k->sequences_sorted + sorted_sequence_id), + ((k->sequences_length - 1) - sorted_sequence_id) + ); + } + + k->sequences_sorted[sorted_sequence_id] = sequence_id; +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static int reallocate_sequences_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index ** new_sequences; + + if ((SIZE_MAX / sizeof(ZoO_index *)) > (size_t) k->sequences_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sequences list, as it would overflow" + "size_t variables." + ); + + return -1; + } + + new_sequences = + (ZoO_index **) realloc + ( + (void *) k->sequences, + (((size_t) k->sequences_length) * sizeof(ZoO_index *)) + ); + + if (new_sequences == (ZoO_index **) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sequence list." + ); + + return -1; + } + + k->sequences = new_sequences; + + return 0; +} + +static int reallocate_sequences_sorted_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index * new_sequences_sorted; + + if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->sequences_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sorted sequences list, as it would" + " overflow size_t variables." + ); + + return -1; + } + + new_sequences_sorted = + (ZoO_index *) realloc + ( + (void *) k->sequences_sorted, + ((size_t) k->sequences_length) * sizeof(ZoO_index) + ); + + if (new_sequences_sorted == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sorted sequences" + " list." + ); + + return -1; + } + + k->sequences_sorted = new_sequences_sorted; + + return 0; +} + +/* Pre: (=< ZoO_INDEX_MAX SIZE_MAX) */ +static ZoO_index * copy_sequence +( + const ZoO_index base [const restrict static 1], + const ZoO_index base_length, + const ZoO_index markov_order +) +{ + ZoO_index * result; + + result = (ZoO_index *) calloc((size_t) base_length, sizeof(ZoO_index)); + + if (result == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required to store a new sequence." + ); + + return (ZoO_index *) NULL; + } + + memcpy + ( + (void *) result, + (const void *) base, + (((size_t) base_length) * sizeof(ZoO_index)) + ); + + return result; +} + +static int add_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre (> markov_order 1) */ + const ZoO_index sequence_id, + const ZoO_index sorted_sequence_id +) +{ + ZoO_index * stored_sequence; + + if (k->sequences_length == ZoO_INDEX_MAX) + { + ZoO_S_ERROR + ( + "Unable to add sequence: the variable that stores the number of known " + "sequences would overflow." + ); + + return -1; + } + + stored_sequence = copy_sequence(sequence, sequence_length, markov_order); + + if (stored_sequence == (ZoO_index *) NULL) + { + return -1; + } + + k->sequences_length += 1; + + if (reallocate_sequences_list(k) < 0) + { + k->sequences_length -= 1; + + return -1; + } + + k->sequences[sequence_id] = stored_sequence; + + if (reallocate_sequences_sorted_list(k) < 0) + { + k->sequences_length -= 1; + + return -1; + } + + set_nth_sequence(k, sorted_sequence_id, sequence_id); + + return -1; +} + +/******************************************************************************/ +/** SEARCH ********************************************************************/ +/******************************************************************************/ + +static int find_sequence +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre: (> 1) */ + ZoO_index sequence_id [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + const ZoO_index markov_sequence_length = (markov_order - 1); + + /* Handles the case where the list is empty ********************************/ + current_max = k->sequences_length; + + if (current_max == 0) + { + *sequence_id = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + k->sequences[k->sequences_sorted[i]], + markov_sequence_length, + sequence, + sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *sequence_id = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *sequence_id = i; + + return -1; + } + + current_max = (i - 1); + } + else + { + *sequence_id = k->sequences_sorted[i]; + + return 0; + } + } +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_markov_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre (> markov_order 1) */ + ZoO_index sequence_id [const restrict static 1] +) +{ + ZoO_index sorted_id; + + if + ( + find_sequence + ( + k, + sequence, + sequence_length, + markov_order, + sequence_id + ) == 0 + ) + { + return 0; + } + + sorted_id = *sequence_id; + *sequence_id = k->sequences_length; + + return + add_sequence + ( + k, + sequence, + sequence_length, + markov_order, + *sequence_id, + sorted_id + ); +} diff --git a/src/knowledge/knowledge_learn_word.c b/src/knowledge/knowledge_learn_word.c new file mode 100644 index 0000000..f55ac5b --- /dev/null +++ b/src/knowledge/knowledge_learn_word.c @@ -0,0 +1,276 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZING STRUCTURES ***************************************************/ +/******************************************************************************/ + +static void initialize_sequence_collection +( + struct ZoO_knowledge_sequence_collection c [const restrict static 1] +) +{ + c->sequences_ref = (ZoO_index *) NULL; + c->sequences_ref_length = 0; + c->sequences_ref_sorted = (ZoO_index *) NULL; + c->occurrences = (ZoO_index *) NULL; + c->targets = (ZoO_index **) NULL; + c->targets_length = (ZoO_index *) NULL; + c->targets_occurrences = (ZoO_index **) NULL; +} + +static void initialize_word +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + w->word = (const ZoO_char *) NULL; + w->word_size = 0; + w->occurrences = 0; + + initialize_sequence_collection(&(w->followed)); + initialize_sequence_collection(&(w->preceded)); +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static ZoO_char * copy_word +( + const ZoO_char original [const restrict static 1], + const ZoO_index original_length +) +{ + ZoO_char * result; + + result = + (ZoO_char *) + calloc + ( + (size_t) (original_length + 1), + sizeof(ZoO_char) + ); + + if (result == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Unable to allocate memory to store new word."); + + return (ZoO_char *) NULL; + } + + memcpy + ( + (void *) result, + (const void *) original, + (((size_t) original_length) * sizeof(ZoO_char)) + ); + + result[original_length] = '\0'; + + return 0; +} + +static int reallocate_words_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + struct ZoO_knowledge_word * new_words; + + if + ( + (SIZE_MAX / sizeof(struct ZoO_knowledge_word)) > (size_t) k->words_length + ) + { + ZoO_S_ERROR + ( + "Unable to store the size of the words list, as it would overflow" + "size_t variables." + ); + + return -1; + } + + new_words = + (struct ZoO_knowledge_word *) realloc + ( + (void *) k->words, + (((size_t) k->words_length) * sizeof(struct ZoO_knowledge_word)) + ); + + if (new_words == (struct ZoO_knowledge_word *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new words list." + ); + + return -1; + } + + k->words = new_words; + + return 0; +} + +static int reallocate_words_sorted_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index * new_words_sorted; + + /* + * This has already been tested previously for a struct ZoO_knowledge_word, + * whose size is bigger than a ZoO_index. + * */ + /* + if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->words_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sorted words list, as it would" + " overflow size_t variables." + ); + + return -1; + } + */ + + new_words_sorted = + (ZoO_index *) realloc + ( + (void *) k->words_sorted, + (((size_t) k->words_length) * sizeof(ZoO_index)) + ); + + if (new_words_sorted == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sorted words list." + ); + + return -1; + } + + k->words_sorted = new_words_sorted; + + return 0; +} + +static void set_nth_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sorted_word_id, + const ZoO_index word_id +) +{ + /* Safe: (> k->words_length 1) */ + if (sorted_word_id < (k->words_length - 1)) + { + memmove + ( + /* Safe: (=< (+ sorted_word_id 1) k->words_length) */ + (void *) (k->words_sorted + (sorted_word_id + 1)), + (const void *) (k->words_sorted + sorted_word_id), + ((k->words_length - 1) - sorted_word_id) + ); + } + + k->words_sorted[sorted_word_id] = word_id; +} + +static int add_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + const ZoO_index word_id, + const ZoO_index sorted_word_id +) +{ + ZoO_char * stored_word; + + if (k->words_length == ZoO_INDEX_MAX) + { + ZoO_S_ERROR + ( + "Unable to add word: the variable that stores the number of known " + "words would overflow." + ); + + return -1; + } + + stored_word = copy_word(word, word_length); + + if (stored_word == (ZoO_char *) NULL) + { + return -1; + } + + k->words_length += 1; + + if (reallocate_words_list(k) < 0) + { + k->words_length -= 1; + + return -1; + } + + initialize_word(k->words + word_id); + + k->words[word_id].word = stored_word; + k->words[word_id].word_size = ((word_length + 1) * sizeof(ZoO_char)); + + if (reallocate_words_sorted_list(k) < 0) + { + k->words_length -= 1; + + return -1; + } + + set_nth_word(k, sorted_word_id, word_id); + + return -1; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + ZoO_index word_id [const restrict static 1] +) +{ + ZoO_index sorted_id; + + if + ( + ZoO_knowledge_find_word_id + ( + k, + word, + (word_length * sizeof(ZoO_char)), + word_id + ) == 0 + ) + { + return 0; + } + + sorted_id = *word_id; + *word_id = k->words_length; + + return add_word(k, word, word_length, *word_id, sorted_id); +} diff --git a/src/knowledge/knowledge_search.c b/src/knowledge/knowledge_search.c new file mode 100644 index 0000000..a48585b --- /dev/null +++ b/src/knowledge/knowledge_search.c @@ -0,0 +1,336 @@ +#include <stdlib.h> + +#include "../core/char.h" +#include "../core/index.h" +#include "../core/sequence.h" + +#include "../cli/cli.h" + +#include "knowledge.h" + +/* See "knowledge.h". */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const size_t word_size, + ZoO_index result [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + + /* Handles the case where the list is empty ********************************/ + current_max = k->words_length; + + if (current_max == 0) + { + *result = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = ZoO_word_cmp(word, word_size, k->words[k->words_sorted[i]].word); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *result = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = current_min; + + return -1; + } + + current_max = (i - 1); + } + else + { + *result = k->words_sorted[i]; + + return 0; + } + } +} + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, /* Pre: (> 0) */ + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max, local_sequence; + const ZoO_index * restrict candidate; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index word = sequence[markov_sequence_length]; + + if (word >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the preceding words of an unknown word." + ); + + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -1; + } + + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *preceding_words = (const ZoO_index *) k->words[word].preceded.targets; + + *preceding_words_weights = + (const ZoO_index *) k->words[word].preceded.targets_occurrences; + + *preceding_words_weights_sum = k->words[word].occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[word].preceded.sequences_ref_length; + + if (current_max == 0) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + ZoO_S_ERROR + ( + "Attempting to find the preceding words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + local_sequence = k->words[word].preceded.sequences_ref_sorted[i]; + + (void) ZoO_knowledge_get_sequence + ( + k, + k->words[word].preceded.sequences_ref[local_sequence], + &candidate + ); + + cmp = + ZoO_sequence_cmp + ( + sequence, + markov_sequence_length, + candidate, + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *preceding_words = k->words[word].preceded.targets[local_sequence]; + + *preceding_words_weights = + k->words[word].preceded.targets_occurrences[local_sequence]; + + *preceding_words_weights_sum = + k->words[word].preceded.occurrences[local_sequence]; + + return 0; + } + } +} + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max, local_sequence; + const ZoO_index * restrict candidate; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index sequence_offset = + ((sequence_length - markov_sequence_length) - 1); + const ZoO_index word = sequence[sequence_offset]; + + if (word >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the following words of an unknown word." + ); + + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -1; + } + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *following_words = (const ZoO_index *) k->words[word].preceded.targets; + + *following_words_weights = + (const ZoO_index *) k->words[word].preceded.targets_occurrences; + + *following_words_weights_sum = k->words[word].occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[word].preceded.sequences_ref_length; + + if (current_max == 0) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + ZoO_S_WARNING + ( + "Attempting to find the following words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + local_sequence = k->words[word].followed.sequences_ref_sorted[i]; + + (void) ZoO_knowledge_get_sequence + ( + k, + k->words[word].followed.sequences_ref[local_sequence], + &candidate + ); + + cmp = + ZoO_sequence_cmp + ( + (sequence + sequence_offset), + markov_sequence_length, + candidate, + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *following_words = k->words[word].followed.targets[local_sequence]; + + *following_words_weights = + k->words[word].followed.targets_occurrences[local_sequence]; + + *following_words_weights_sum = + k->words[word].followed.occurrences[local_sequence]; + + return 0; + } + } +} diff --git a/src/knowledge/knowledge_types.h b/src/knowledge/knowledge_types.h new file mode 100644 index 0000000..7eafc8b --- /dev/null +++ b/src/knowledge/knowledge_types.h @@ -0,0 +1,38 @@ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_ + +#include "../core/index_types.h" +#include "../core/char_types.h" + +struct ZoO_knowledge_sequence_collection +{ + ZoO_index * sequences_ref; + ZoO_index sequences_ref_length; + ZoO_index * sequences_ref_sorted; + ZoO_index * occurrences; + ZoO_index ** targets; + ZoO_index * targets_length; + ZoO_index ** targets_occurrences; +}; + +struct ZoO_knowledge_word +{ + const ZoO_char * word; + size_t word_size; + ZoO_index occurrences; + struct ZoO_knowledge_sequence_collection followed; + struct ZoO_knowledge_sequence_collection preceded; +}; + +struct ZoO_knowledge +{ + struct ZoO_knowledge_word * words; + ZoO_index words_length; + ZoO_index * words_sorted; + ZoO_index ** sequences; + ZoO_index sequences_length; + ZoO_index * sequences_sorted; + ZoO_index sequences_length; +}; + +#endif |


