From 492b9cd1ecc234ea8f3080b305103702d2ca772b Mon Sep 17 00:00:00 2001 From: Nathanael Sensfelder Date: Sat, 7 Jan 2017 23:30:35 +0100 Subject: Starting to rewrite the knowledge stuff. --- src/core/CMakeLists.txt | 4 +- src/core/char.c | 48 +++ src/core/char.h | 21 ++ src/core/char_types.h | 17 + src/core/create_sequence.c | 739 ------------------------------------------- src/core/knowledge.c | 50 --- src/core/knowledge.h | 78 ++--- src/core/knowledge_search.c | 339 ++++++++++++++++++++ src/core/knowledge_types.h | 58 +--- src/core/sequence.c | 152 +++------ src/core/sequence.h | 23 +- src/core/sequence_creation.c | 708 +++++++++++++++++++++++++++++++++++++++++ src/pervasive.h | 10 - 13 files changed, 1262 insertions(+), 985 deletions(-) create mode 100644 src/core/char.c create mode 100644 src/core/char.h create mode 100644 src/core/char_types.h delete mode 100644 src/core/create_sequence.c create mode 100644 src/core/knowledge_search.c create mode 100644 src/core/sequence_creation.c (limited to 'src') diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index af5ca65..37b95cb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,9 +1,11 @@ set( SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/char.c ${CMAKE_CURRENT_SOURCE_DIR}/main.c ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c - ${CMAKE_CURRENT_SOURCE_DIR}/create_sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c ) diff --git a/src/core/char.c b/src/core/char.c new file mode 100644 index 0000000..39ca72e --- /dev/null +++ b/src/core/char.c @@ -0,0 +1,48 @@ +#include + +#include "char.h" + +int ZoO_char_is_banned (const ZoO_char c) +{ + switch (c) + { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '<': + case '>': + return 1; + + default: + return 0; + } +} + +int ZoO_char_is_punctuation (const ZoO_char c) +{ + switch (c) + { + case '!': + case ',': + case '.': + case ':': + case ';': + case '?': + return 1; + + default: + return 0; + } +} + +int ZoO_word_cmp +( + const ZoO_char word_a [const static 1], + const ZoO_char word_b [const static 1] +) +{ + return strcmp((const char *) word_a, (const char *) word_b); +} diff --git a/src/core/char.h b/src/core/char.h new file mode 100644 index 0000000..772a3a2 --- /dev/null +++ b/src/core/char.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_CORE_CHAR_H_ +#define _ZoO_CORE_CHAR_H_ + +#include "char_types.h" + +enum ZoO_word_property ZoO_get_word_property +( + const ZoO_char word [const restrict], + size_t word_size +); + +int ZoO_word_cmp +( + const ZoO_char word_a [const static 1], + const ZoO_char word_b [const static 1] +); + +int ZoO_char_is_punctuation (const ZoO_char c); +int ZoO_word_char_is_banned (const ZoO_char c); + +#endif diff --git a/src/core/char_types.h b/src/core/char_types.h new file mode 100644 index 0000000..67b5294 --- /dev/null +++ b/src/core/char_types.h @@ -0,0 +1,17 @@ +#ifndef _ZoO_CORE_CHAR_TYPES_H_ +#define _ZoO_CORE_CHAR_TYPES_H_ + +enum ZoO_word_property +{ + ZoO_WORD_NO_PROPERTY, + ZoO_WORD_HAS_NO_LEFT_SEPARATOR, + ZoO_WORD_HAS_NO_RIGHT_SEPARATOR +}; + +/* ZoO_char = UTF-8 char */ +typedef char ZoO_char; + +/* Functions that can handle UTF-8 'char' will use this symbol. */ +#define ZoO_CHAR_STRING_SYMBOL "%s" + +#endif diff --git a/src/core/create_sequence.c b/src/core/create_sequence.c deleted file mode 100644 index 6b2cb62..0000000 --- a/src/core/create_sequence.c +++ /dev/null @@ -1,739 +0,0 @@ -#include -#include -#include -#include /* defines SIZE_MAX */ - -#include "../io/error.h" - -#include "../core/index.h" -#include "../core/knowledge.h" - -#include "sequence.h" - -/* - * Returns a randomly chosen index pointing to a element in {weights}. - * The values of {weights} are used as weights to guide the choice. - * Returns: - * Value in [0, (length weights)[. - * Pre: - * (> weights_sum 0). - * (= (sum weights) weights_sum). - */ -static ZoO_index weighted_random_pick -( - const ZoO_index weights [const restrict static 1], - const ZoO_index weights_sum -) -{ - ZoO_index result, accumulator, random_number; - - accumulator = 0; - - /* Safe: Included in [0, weights_sum]. */ - random_number = ZoO_index_random_up_to(weights_sum); - - for (result = 0; accumulator < random_number; ++result) - { - /* Safe: (= (sum weights) weights_sum) */ - accumulator += weights[result]; - } - - return result; -} - -/* - * FIXME: This does not belong here. - * Calculates the size the sentence will have upon addition of the word, taking - * into account {effect}. - * Returns: - * 0 on success. - * -1 iff adding the word would overflow {sentence_size}. - * Post: - * (initialized new_size) - */ -static int get_new_size -( - const size_t word_size, - const size_t sentence_size, - const enum ZoO_knowledge_special_effect effect, - size_t new_size [const restrict static 1] -) -{ - size_t added_size; - - switch (effect) - { - case ZoO_WORD_HAS_NO_EFFECT: - /* word also contains an '\0', which we will replace by a ' ' */ - added_size = word_size; - break; - - case ZoO_WORD_ENDS_SENTENCE: - case ZoO_WORD_STARTS_SENTENCE: - added_size = 0; - break; - - case ZoO_WORD_REMOVES_LEFT_SPACE: - case ZoO_WORD_REMOVES_RIGHT_SPACE: - /* word also contains an '\0', which we will remove. */ - added_size = (word_size - 1); - break; - } - - if ((SIZE_MAX - word_size) > sentence_size) - { - /* New size Would overflow. */ - *new_size = sentence_size; - - return -1; - } - - /* Safe: (=< SIZE_MAX (+ sentence_size added_size)) */ - *new_size = (sentence_size + added_size); - - return 0; -} - -/******************************************************************************/ -/** ADDING ELEMENTS TO THE LEFT ***********************************************/ -/******************************************************************************/ - -/* - * Adds an id to the left of the sequence. - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {*sequence} remains untouched. - * Returns: - * 0 on success. - * -1 iff adding the word would cause an overflow. - * -2 iff memory allocation was unsuccessful. - * Post: - * (initialized {sequence}) - * (initialized {*sequence}) - */ -static int left_append -( - const ZoO_index word_id, - ZoO_index * sequence [const restrict], - const size_t sequence_size -) -{ - ZoO_index * new_sequence; - - if ((SIZE_MAX - sizeof(ZoO_index)) > sequence_size) - { - ZoO_S_ERROR - ( - "Left side append aborted, as the new sequence's size would overflow." - ); - - return -1; - } - - new_sequence = (ZoO_index *) malloc(sizeof(ZoO_index) + sequence_size); - - if (new_sequence == (ZoO_index *) NULL) - { - ZoO_S_ERROR - ( - "Left side append aborted, as memory for the new sequence could not be" - " allocated." - ); - - return -2; - } - - if (sequence_size > 0) - { - memcpy - ( - (void *) (new_sequence + 1), - (const void *) sequence, - sequence_size - ); - - free((void *) sequence); - } - - new_sequence[0] = word_id; - - *sequence = new_sequence; - - return 0; -} - -/* - * Adds an id to the left of the sequence, according to what is known as likely - * to fit there. - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {*sequence} remains untouched. - * Returns: - * 0 on success. - * -1 iff nothing fitting was found. - * -2 iff the addition of that id failed. - * Pre: - * (initialized {sequence}) - * (initialized {k}) - * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) - */ -static int extend_left -( - ZoO_index * sequence [const restrict static 1], - const size_t sequence_size, - const struct ZoO_knowledge k [const restrict static 1] -) -{ - const ZoO_index * restrict preceding_words; - const ZoO_index * restrict preceding_words_weights; - ZoO_index preceding_words_weights_sum; - - if - ( - ZoO_knowledge_get_preceding_words - ( - k, - *sequence, - &preceding_words, - &preceding_words_weights, - &preceding_words_weights_sum - ) < 0 - ) - { - return -1; - } - - /* preceding_words_weights_sum > 0 */ - - if - ( - left_append - ( - weighted_random_pick - ( - preceding_words_weights, - preceding_words_weights_sum - ), - sequence, - sequence_size - ) < 0 - ) - { - return -3; - } - - return 0; -} - -/* - * Continuously adds ids to the left of the sequence, according to what is known - * as likely to fit there. If {credits} is NULL, it will stop upon reaching - * the id indicating the start of a sequence, otherwise it will also limit to - * {*credits} words added (including the one indicating the start of a - * sequence). - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {sequence} remains unfreed. - * Returns: - * 0 on success. - * -1 iff we did not manage to have ZoO_START_OF_SEQUENCE_ID as a starting - * point. This cannot be caused by lack of {*credits}, but rather by a - * memory allocation problem or a more important issue in {k}. Indeed, it - * could mean we do not know any word preceding {*sequence[0]}, not even - * ZoO_START_OF_SEQUENCE_ID. - * Pre: - * (initialized {sequence}) - * (initialized {sequence_size}) - * (initialized {k}) - * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) - */ -static int complete_left_part_of_sequence -( - ZoO_index * sequence [restrict static 1], - size_t sequence_size [const restrict static 1], - ZoO_index credits [const restrict], - const struct ZoO_knowledge k [const restrict static 1] -) -{ - for (;;) - { - if ((credits == (ZoO_index *) NULL) || (*credits > 0)) - { - if (extend_left(sequence, *sequence_size, k) < 0) - { - /* We are sure *sequence[0] is defined. */ - if (*sequence[0] == ZoO_START_OF_SEQUENCE_ID) - { - /* - * We failed to add a word, but it was because none should have - * been added. - */ - return 0; - } - else - { - return -1; - } - } - } - else - { - /* No more credits available, the sequence will have to start here. */ - *sequence[0] = ZoO_START_OF_SEQUENCE_ID; - - return 0; - } - - /* - * Safe: if it was going to overflow, extend_left would have returned a - * negative value, making this statement unreachable. - */ - *sequence_size = (*sequence_size + sizeof(ZoO_index)); - - if (credits != (ZoO_index *) NULL) - { - *credits -= 1; - } - - /* We are sure *sequence[0] is defined. */ - switch (*sequence[0]) - { - case ZoO_END_OF_SEQUENCE_ID: - ZoO_S_WARNING - ( - "END OF LINE was added at the left part of an sequence." - ); - - *sequence[0] = ZoO_START_OF_SEQUENCE_ID; - return 0; - - case ZoO_START_OF_SEQUENCE_ID: - return 0; - - default: - break; - } - } -} - -/******************************************************************************/ -/** ADDING ELEMENTS TO THE RIGHT **********************************************/ -/******************************************************************************/ - -/* - * Adds an id to the right of the sequence. - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {sequence} remain untouched. - * Returns: - * 0 on success. - * -1 iff adding the word would cause an overflow. - * -2 iff memory allocation was unsuccessful. - * Post: - * (initialized {sequence}) - * (initialized {*sequence}) - */ -static int right_append -( - ZoO_index * sequence [const restrict], - const ZoO_index word_id, - const size_t sequence_size, - const ZoO_index sequence_length -) -{ - ZoO_index * new_sequence; - - if ((SIZE_MAX - sizeof(ZoO_index)) > sequence_size) - { - ZoO_S_ERROR - ( - "Right side append aborted, as the new sequence's size would overflow." - ); - - return -1; - } - - new_sequence = - (ZoO_index *) realloc - ( - sequence, - (sequence_size + sizeof(ZoO_index)) - ); - - if (new_sequence == (ZoO_index *) NULL) - { - ZoO_S_ERROR - ( - "Right side append aborted, as memory for the new sequence could not " - "be allocated." - ); - - return -2; - } - - new_sequence[sequence_length] = word_id; - - *sequence = new_sequence; - - return 0; -} - -/* - * Adds an id to the right of the sequence, according to what is known as likely - * to fit there. - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {*sequence} remains untouched. - * Returns: - * 0 on success. - * -1 iff nothing fitting was found. - * -2 iff the addition of that id failed. - * Pre: - * (initialized {sequence}) - * (initialized {k}) - * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) - */ -static int extend_right -( - ZoO_index * sequence [const restrict static 1], - const size_t sequence_size, - const ZoO_index sequence_length, - const struct ZoO_knowledge k [const restrict static 1] -) -{ - const ZoO_index * restrict following_words; - const ZoO_index * restrict following_words_weights; - - ZoO_index following_words_weights_sum; - - if - ( - ZoO_knowledge_get_following_words - ( - k, - *sequence, - sequence_length, - &following_words, - &following_words_weights, - &following_words_weights_sum - ) < 0 - ) - { - return -1; - } - - /* following_words_weights_sum > 0 */ - - if - ( - right_append - ( - sequence, - weighted_random_pick - ( - following_words_weights, - following_words_weights_sum - ), - sequence_size, - sequence_length - ) < 0 - ) - { - return -3; - } - - return 0; -} - -/* - * Continuously adds ids to the right of the sequence, according to what is - * known as likely to fit there. If {credits} is NULL, it will stop upon - * reaching the id indicating the end of a sequence, otherwise it will also - * limit to {*credits} words added (including the one indicating the end of a - * sequence). - * This requires the reallocation of {sequence}. The freeing of the previous - * memory space is handled. If an error happened, {sequence} remain untouched. - * Returns: - * 0 on success. - * -1 iff we did not manage to have ZoO_END_OF_SEQUENCE_ID as a stopping - * point. This cannot be caused by lack of {*credits}, but rather by a - * memory allocation problem or a more important issue in {k}. Indeed, it - * could mean we do not know any word following {*sequence[0]}, not even - * ZoO_END_OF_SEQUENCE_ID. - * Pre: - * (initialized {sequence}) - * (initialized {*sequence_size}) - * (initialized {k}) - * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) - */ -static int complete_right_part_of_sequence -( - ZoO_index * sequence [const restrict static 1], - size_t sequence_size [const restrict static 1], - ZoO_index credits [const restrict], - const struct ZoO_knowledge k [const restrict static 1] -) -{ - ZoO_index sequence_length; - - sequence_length = (*sequence_size / sizeof(ZoO_index)); - - for (;;) - { - if ((credits == (ZoO_index *) NULL) || (*credits > 0)) - { - if (extend_right(sequence, *sequence_size, sequence_length, k) < 0) - { - /* Safe: (> sequence_length 1) */ - if (*sequence[(sequence_length - 1)] == ZoO_END_OF_SEQUENCE_ID) - { - /* - * We failed to add a word, but it was because none should have - * been added. - */ - return 0; - } - else - { - return -1; - } - } - } - else - { - /* No more credits available, we end the sequence. */ - *sequence[(sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; - - return 0; - } - - /* - * Safe: if it was going to overflow, extend_left would have returned a - * negative value, making this statement unreachable. - */ - *sequence_size = (*sequence_size + sizeof(ZoO_index)); - sequence_length += 1; - - if (credits != (ZoO_index *) NULL) - { - *credits -= 1; - } - - /* Safe: (> sequence_length 1) */ - switch (*sequence[(sequence_length - 1)]) - { - case ZoO_START_OF_SEQUENCE_ID: - ZoO_S_WARNING - ( - "END OF LINE was added at the right part of an sequence." - ); - - *sequence[(sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; - return 0; - - case ZoO_END_OF_SEQUENCE_ID: - return 0; - - default: - break; - } - } -} - -/******************************************************************************/ -/** INITIALIZING SEQUENCE *****************************************************/ -/******************************************************************************/ - -/* - * Allocates the memory required to store the initial sequence. - * Returns: - * 0 on success. - * -1 if this would require more memory than can indicate a size_t variable. - * -2 if the allocation failed. - * Post: - * (initialized {*sequence}) - * (initialized {*sequence_size}) - */ -static int allocate_initial_sequence -( - ZoO_index * sequence [const restrict static 1], - size_t sequence_size [const restrict static 1], - const ZoO_index markov_order -) -{ - if ((SIZE_MAX / sizeof(ZoO_index)) > ((size_t) markov_order)) - { - ZoO_S_ERROR - ( - "Unable to store size of the initial sequence in a size_t variable." - "Either reduce the size of a ZoO_index or the markovian order." - ); - - *sequence = (ZoO_index *) NULL; - *sequence_size = 0; - - return -1; - } - - *sequence_size = ((size_t) markov_order) * sizeof(ZoO_index); - *sequence = (ZoO_index *) malloc(*sequence_size); - - if (*sequence == (void *) NULL) - { - *sequence_size = 0; - - ZoO_S_ERROR - ( - "Unable to allocate the memory required for an new sequence." - ); - - return -2; - } - - return 0; -} - -/* - * Initializes an pre-allocated sequence by filling it with {initial_word} - * followed by a sequence of ({markov_order} - 1) words that is known to have - * followed {initial_word} at least once. This sequence is chosen depending on - * how often {k} indicates it has followed {initial_word}. Note that if - * {markov_order} is 1, there is no sequence added, simply {initial_word}. - * Returns: - * 0 on success. - * -1 if no such sequence was found. - * Pre: - * (size (= {sequence} {markov_order})) - * (initialized {k}) - * (> markov_order 0) - * Post: - * (initialized {sequence[0..(markov_order - 1)]}) - */ -static int initialize_sequence -( - ZoO_index sequence [const restrict static 1], - const ZoO_index initial_word, - const ZoO_index markov_order, - const struct ZoO_knowledge k [const static 1] -) -{ - const ZoO_index * const restrict * restrict following_sequences; - const ZoO_index * restrict following_sequences_weights; - ZoO_index following_sequences_weights_sum; - ZoO_index chosen_sequence; - - sequence[0] = initial_word; - - if (markov_order == 1) - { - return 0; - } - - if - ( - ZoO_knowledge_get_following_sequences - ( - k, - initial_word, - &following_sequences, - &following_sequences_weights, - &following_sequences_weights_sum - ) < 0 - ) - { - ZoO_S_ERROR - ( - "Unable to find any sequence that would precede the initial word." - ); - - return -1; - } - - chosen_sequence = - weighted_random_pick - ( - following_sequences_weights, - following_sequences_weights_sum - ); - - /* Safe if 'allocate_initial_sequence' succeeded. */ - memcpy - ( - (void *) (sequence + 1), - (const void *) (following_sequences + chosen_sequence), - (((size_t) markov_order) - 1) * sizeof(ZoO_index) - ); - - return 0; -} - -/******************************************************************************/ -/** EXPORTED ******************************************************************/ -/******************************************************************************/ - -/* See "sequence.h" */ -int ZoO_create_sequence_from -( - const ZoO_index initial_word, - ZoO_index credits [const restrict], - const struct ZoO_knowledge k [const restrict static 1], - const ZoO_index markov_order, - ZoO_index * sequence [const restrict static 1], - size_t sequence_size [const restrict static 1] -) -{ - if (allocate_initial_sequence(sequence, sequence_size, markov_order) < 0) - { - return -1; - } - - if (initialize_sequence(*sequence, initial_word, markov_order, k) < 0) - { - free((void *) *sequence); - *sequence_size = 0; - - return -2; - } - - if - ( - complete_right_part_of_sequence - ( - sequence, - sequence_size, - credits, - k - ) < 0 - ) - { - free((void *) *sequence); - *sequence_size = 0; - - return -3; - } - - if - ( - complete_left_part_of_sequence - ( - sequence, - sequence_size, - credits, - k - ) < 0 - ) - { - free((void *) *sequence); - *sequence_size = 0; - - return -4; - } - - if ((*sequence_size / sizeof(ZoO_index)) < 3) - { - /* 2 elements, for start and stop. */ - ZoO_S_ERROR("Created sequence was empty."); - - free((void *) *sequence); - *sequence_size = 0; - - return -5; - } - - return 0; -} diff --git a/src/core/knowledge.c b/src/core/knowledge.c index 4980fdd..279a646 100644 --- a/src/core/knowledge.c +++ b/src/core/knowledge.c @@ -9,56 +9,6 @@ /** Basic functions of the ZoO_knowledge structure ****************************/ -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_punctuation_chars_count = 8; -const ZoO_char const ZoO_knowledge_punctuation_chars[8] = - { - '!', - ',', - '.', - ':', - ';', - '?', - '~', - '\001' - }; - -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_forbidden_chars_count = 8; -const ZoO_char const ZoO_knowledge_forbidden_chars[8]= - { - '(', - ')', - '[', - ']', - '{', - '}', - '<', - '>' - }; - -static int cmp_word -( - const void * const a, - const void * const b, - const void * const other -) -{ - ZoO_char const * word; - ZoO_index const * sorted_index; - struct ZoO_knowledge const * k; - - word = (ZoO_char const *) a; - sorted_index = (ZoO_index const *) b; - k = (struct ZoO_knowledge *) other; - - return strcmp - ( - (const char *) word, - (const char *) k->words[*sorted_index].word - ); -} - /* See "knowledge.h". */ int ZoO_knowledge_find ( diff --git a/src/core/knowledge.h b/src/core/knowledge.h index 7b5d754..b4f7b7e 100644 --- a/src/core/knowledge.h +++ b/src/core/knowledge.h @@ -1,7 +1,8 @@ #ifndef _ZoO_CORE_KNOWLEDGE_H_ #define _ZoO_CORE_KNOWLEDGE_H_ -#include "../tool/strings_types.h" +#include "../core/char_types.h" +#include "../core/index_types.h" #include "knowledge_types.h" @@ -24,22 +25,6 @@ int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]); */ void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); -/* - * When returning 0: - * {word} is in {k}. - * {word} is located at {k->words[*result]}. - * - * When returning -1: - * {word} is not in {k}. - * {*result} is where {word} was expected to be found in - * {k->sorted_indices}. - */ -int ZoO_knowledge_find -( - const struct ZoO_knowledge k [const restrict static 1], - const ZoO_char word [const restrict static 1], - ZoO_index result [const restrict static 1] -); /* * When returning 0: @@ -59,39 +44,58 @@ int ZoO_knowledge_learn ZoO_index result [const restrict static 1] ); -int ZoO_knowledge_assimilate +int ZoO_knowledge_learn_sequence ( struct ZoO_knowledge k [const static 1], - struct ZoO_strings string [const restrict static 1], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count] + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length ); -int ZoO_knowledge_extend +int ZoO_knowledge_get_following_sequences ( - struct ZoO_knowledge k [const static 1], - const struct ZoO_strings string [const], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count], - ZoO_char * result [const static 1] + const struct ZoO_knowledge k [const static 1], + const ZoO_index initial_word, + const ZoO_index * const restrict * following_sequences [const restrict static 1], + const ZoO_index * following_sequences_weights [const restrict static 1], + const ZoO_index following_sequences_weights_sum [const static 1] ); -int ZoO_knowledge_find_link +/* + * When returning 0: + * {word} is in {k}. + * {word} is located at {k->words[*result]}. + * + * When returning -1: + * {word} is not in {k}. + * {*result} is where {word} was expected to be found in + * {k->sorted_indices}. + */ +int ZoO_knowledge_find_word_id ( - ZoO_index const links_count, - struct ZoO_knowledge_link links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], ZoO_index result [const restrict static 1] ); -/* Create it if it's not found. */ -int ZoO_knowledge_get_link +int ZoO_knowledge_find_preceding_words ( - ZoO_index links_count [const], - struct ZoO_knowledge_link * links [const], - ZoO_index const sequence [const restrict static ZoO_S_LINK_SIZE], - ZoO_index result [const restrict static 1] + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] ); +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +); #endif diff --git a/src/core/knowledge_search.c b/src/core/knowledge_search.c new file mode 100644 index 0000000..af62266 --- /dev/null +++ b/src/core/knowledge_search.c @@ -0,0 +1,339 @@ +#include + +#include "../core/char.h" +#include "../core/index.h" +#include "../core/sequence.h" + +#include "../io/error.h" + +#include "knowledge.h" + +/* See "knowledge.h". */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + + /* Handles the case where the list is empty ********************************/ + current_max = k->words_length; + + if (current_max == 0) + { + *result = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *result = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = current_min; + + return -1; + } + + current_max = (i - 1); + } + else + { + *result = i; + + return 0; + } + } +} + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, /* Pre: (> 0) */ + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + const ZoO_index markov_sequence_length = (markov_order - 1); + + if (sequence[markov_sequence_length] >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the preceding words of an unknown word." + ); + + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -1; + } + + *preceding_words_weights_sum = + k->words[sequence[markov_sequence_length]].occurrences; + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *preceding_words = + (const ZoO_index *) k->words + [ + sequence[markov_sequence_length] + ].preceded.targets; + + *preceding_words_weights = + (const ZoO_index *) k->words + [ + sequence[markov_sequence_length] + ].preceded.targets_occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = + k->words[sequence[markov_sequence_length]].preceded.sequences_length; + + if (current_max == 0) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + ZoO_S_ERROR + ( + "Attempting to find the preceding words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + sequence, + markov_sequence_length, + k->words[sequence[markov_sequence_length]].preceded.sequences[i], + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *preceding_words = + k->words + [ + sequence[markov_sequence_length] + ].preceded.targets[i]; + + *preceding_words_weights = + k->words + [ + sequence[markov_sequence_length] + ].preceded.targets_occurrences[i]; + + return 0; + } + } +} + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index word_of_interest = + (sequence_length - markov_sequence_length) - 1; + + if (sequence[word_of_interest] >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the following words of an unknown word." + ); + + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -1; + } + + *following_words_weights_sum = + k->words[sequence[word_of_interest]].occurrences; + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *following_words = + (const ZoO_index *) k->words + [ + sequence[word_of_interest] + ].preceded.targets; + + *following_words_weights = + (const ZoO_index *) k->words + [ + sequence[word_of_interest] + ].preceded.targets_occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[sequence[word_of_interest]].preceded.sequences_length; + + if (current_max == 0) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + ZoO_S_WARNING + ( + "Attempting to find the following words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + (sequence + word_of_interest), + markov_sequence_length, + k->words[sequence[word_of_interest]].followed.sequences[i], + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *following_words = + k->words + [ + sequence[markov_sequence_length] + ].followed.targets[i]; + + *following_words_weights = + k->words + [ + sequence[markov_sequence_length] + ].followed.targets_occurrences[i]; + + return 0; + } + } +} diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h index e92b5e1..aea11da 100644 --- a/src/core/knowledge_types.h +++ b/src/core/knowledge_types.h @@ -1,62 +1,34 @@ #ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ #define _ZoO_CORE_KNOWLEDGE_TYPES_H_ -#include "../pervasive.h" +#include "../core/index_types.h" +#include "../core/char_types.h" -#define ZoO_WORD_START_OF_LINE 0 -#define ZoO_WORD_END_OF_LINE 1 - -#if ZoO_MARKOV_ORDER == 1 - #define ZoO_SEQUENCE_SIZE 1 -#else - #define ZoO_SEQUENCE_SIZE ZoO_MARKOV_ORDER - 1 -#endif - -#define ZoO_S_LINK_SIZE (ZoO_SEQUENCE_SIZE + 1) - -/* XXX: are we as close to immutable as we want to be? */ -extern unsigned int const ZoO_knowledge_punctuation_chars_count; -extern const ZoO_char const ZoO_knowledge_punctuation_chars[8]; -extern unsigned int const ZoO_knowledge_forbidden_chars_count; -extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; - - -enum ZoO_knowledge_special_effect +struct ZoO_knowledge_sequence_collection { - ZoO_WORD_HAS_NO_EFFECT, - ZoO_WORD_ENDS_SENTENCE, - ZoO_WORD_STARTS_SENTENCE, - ZoO_WORD_REMOVES_LEFT_SPACE, - ZoO_WORD_REMOVES_RIGHT_SPACE -}; - -struct ZoO_knowledge_link -{ - ZoO_index sequence[ZoO_SEQUENCE_SIZE]; - ZoO_index occurrences; - ZoO_index targets_count; - ZoO_index * targets_occurrences; - ZoO_index * targets; + ZoO_index ** sequences; + ZoO_index sequences_length; + ZoO_index * sequences_sorted; + ZoO_index * occurrences; + ZoO_index ** targets; + ZoO_index * targets_length; + ZoO_index ** targets_occurrences; }; struct ZoO_knowledge_word { - size_t word_size; ZoO_char * word; - enum ZoO_knowledge_special_effect special; + size_t word_size; ZoO_index occurrences; - ZoO_index forward_links_count; - ZoO_index backward_links_count; - struct ZoO_knowledge_link * forward_links; - struct ZoO_knowledge_link * backward_links; + struct ZoO_knowledge_sequence_collection followed; + struct ZoO_knowledge_sequence_collection preceded; }; - struct ZoO_knowledge { - ZoO_index words_count; - ZoO_index * sorted_indices; struct ZoO_knowledge_word * words; + ZoO_index words_length; + ZoO_index * words_sorted; }; #endif diff --git a/src/core/sequence.c b/src/core/sequence.c index 67174d1..9e370a3 100644 --- a/src/core/sequence.c +++ b/src/core/sequence.c @@ -1,129 +1,75 @@ #include #include -#include "../io/error.h" -#include "../tool/sorted_list.h" +#include "../core/index.h" -#include "knowledge.h" +#include "sequence.h" -static int cmp_seq_link +/* See "sequence.h" */ +int ZoO_sequence_cmp ( - const void * const a, - const void * const b, - const void * const other + const ZoO_index sequence_a [const], + const ZoO_index sequence_a_length, + const ZoO_index sequence_b [const], + const ZoO_index sequence_b_length ) { - ZoO_index j; - const ZoO_index * sequence; - const struct ZoO_knowledge_link * link; + ZoO_index min_length; + ZoO_index i; - sequence = (const ZoO_index *) a; - link = (const struct ZoO_knowledge_link *) b; + if (sequence_a_length < sequence_b_length) + { + min_length = sequence_a_length; + } + else + { + min_length = sequence_b_length; + } - for (j = 0; j < ZoO_SEQUENCE_SIZE; ++j) + for (i = 0; i < min_length; ++i) { - if (sequence[j] < link->sequence[j]) + if (sequence_a[i] < sequence_b[i]) { return -1; } - else if (sequence[j] > link->sequence[j]) + else if (sequence_b[i] > sequence_b[i]) { return 1; } - } - - return 0; -} - -int ZoO_knowledge_find_link -( - ZoO_index const links_count, - struct ZoO_knowledge_link links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], - ZoO_index result [const restrict static 1] -) -{ - return - ZoO_sorted_list_index_of + else if ( - links_count, - (void const *) links, - (void const *) sequence, - sizeof(struct ZoO_knowledge_link), - cmp_seq_link, - (void const *) NULL, - result - ); -} - -int ZoO_knowledge_get_link -( - ZoO_index links_count [const], - struct ZoO_knowledge_link * links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], - ZoO_index result [const restrict static 1] -) -{ - struct ZoO_knowledge_link * new_p; + (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) + && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) + ) + { + return 0; + } + } - if - ( - ZoO_sorted_list_index_of - ( - *links_count, - (void const *) *links, - (void const *) sequence, - sizeof(struct ZoO_knowledge_link), - cmp_seq_link, - (void const *) NULL, - result - ) == 0 - ) + if (sequence_a_length < sequence_b_length) { - return 0; + if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) + { + return 0; + } + else + { + return -1; + } } - - *links_count += 1; - - new_p = - (struct ZoO_knowledge_link *) realloc - ( - (void *) *links, - (sizeof(struct ZoO_knowledge_link) * (*links_count)) - ); - - if (new_p == (struct ZoO_knowledge_link *) NULL) + else if (sequence_a_length > sequence_b_length) { - *links_count -= 1; - - return -1; + if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) + { + return 0; + } + else + { + return 1; + } } - - if (*result < (*links_count - 1)) + else { - memmove( - (void *) (new_p + *result + 1), - (const void *) (new_p + *result), - (sizeof(struct ZoO_knowledge_link) * (*links_count - 1 - *result)) - ); + return 0; } - - *links = new_p; - - new_p += *result; - - memcpy - ( - (void *) new_p->sequence, - (void const *) sequence, - /* can be zero */ - (sizeof(ZoO_index) * ZoO_SEQUENCE_SIZE) - ); - - new_p->occurrences = 0; - new_p->targets_count = 0; - new_p->targets_occurrences = (ZoO_index *) NULL; - new_p->targets = (ZoO_index *) NULL; - - return 0; } diff --git a/src/core/sequence.h b/src/core/sequence.h index fb4b628..e609b4d 100644 --- a/src/core/sequence.h +++ b/src/core/sequence.h @@ -2,7 +2,7 @@ #define _ZoO_CORE_SEQUENCE_H_ #include "../core/index_types.h" -#include "../core/knownledge_types.h" +#include "../core/knowledge_types.h" #include "sequence_types.h" @@ -27,7 +27,7 @@ * (knows {k} {initial_word}) * (initialized {k}) */ -int ZoO_create_sequence_from +int ZoO_sequence_create_from ( const ZoO_index initial_word, ZoO_index credits [const restrict], @@ -37,4 +37,23 @@ int ZoO_create_sequence_from size_t sequence_size [const restrict static 1] ); +/* + * Compares two sequences. + * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated + * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and + * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to + * contain ZoO_END_OF_SEQUENCE. + * Return: + * 1 iff {sequence_a} should be considered being more than {sequence_b} + * 0 iff {sequence_a} should be considered being equal to {sequence_b} + * -1 iff {sequence_a} should be considered being less than {sequence_b} + */ +int ZoO_sequence_cmp +( + const ZoO_index sequence_a [const], + const ZoO_index sequence_a_length, + const ZoO_index sequence_b [const], + const ZoO_index sequence_b_length +); + #endif diff --git a/src/core/sequence_creation.c b/src/core/sequence_creation.c new file mode 100644 index 0000000..b1f0f36 --- /dev/null +++ b/src/core/sequence_creation.c @@ -0,0 +1,708 @@ +#include +#include +#include +#include /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "../core/index.h" +#include "../core/knowledge.h" + +#include "sequence.h" + +/* + * Returns a randomly chosen index pointing to a element in {weights}. + * The values of {weights} are used as weights to guide the choice. + * Returns: + * Value in [0, (length weights)[. + * Pre: + * (> weights_sum 0). + * (= (sum weights) weights_sum). + */ +static ZoO_index weighted_random_pick +( + const ZoO_index weights [const restrict static 1], + const ZoO_index weights_sum +) +{ + ZoO_index result, accumulator, random_number; + + accumulator = 0; + + /* Safe: Included in [0, weights_sum]. */ + random_number = ZoO_index_random_up_to(weights_sum); + + for (result = 0; accumulator < random_number; ++result) + { + /* Safe: (= (sum weights) weights_sum) */ + accumulator += weights[result]; + } + + return result; +} + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE LEFT ***********************************************/ +/******************************************************************************/ + +/* + * Adds an id to the left of the sequence. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Returns: + * 0 on success. + * -1 iff adding the word would cause an overflow. + * -2 iff memory allocation was unsuccessful. + * Post: + * (initialized {sequence}) + * (initialized {*sequence}) + */ +static int left_append +( + const ZoO_index word_id, + ZoO_index * sequence [const restrict], + const size_t sequence_size +) +{ + ZoO_index * new_sequence; + + if ((SIZE_MAX - sizeof(ZoO_index)) > sequence_size) + { + ZoO_S_ERROR + ( + "Left side append aborted, as the new sequence's size would overflow." + ); + + return -1; + } + + new_sequence = (ZoO_index *) malloc(sizeof(ZoO_index) + sequence_size); + + if (new_sequence == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Left side append aborted, as memory for the new sequence could not be" + " allocated." + ); + + return -2; + } + + if (sequence_size > 0) + { + memcpy + ( + (void *) (new_sequence + 1), + (const void *) sequence, + sequence_size + ); + + free((void *) sequence); + } + + new_sequence[0] = word_id; + + *sequence = new_sequence; + + return 0; +} + +/* + * Adds an id to the left of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Returns: + * 0 on success. + * -1 iff nothing fitting was found. + * -2 iff the addition of that id failed. + * Pre: + * (initialized {sequence}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..({markov_order} - 1)]}) + */ +static int extend_left +( + ZoO_index * sequence [const restrict static 1], + const size_t sequence_size, + const ZoO_index markov_order, + const struct ZoO_knowledge k [const restrict static 1] +) +{ + const ZoO_index * restrict preceding_words; + const ZoO_index * restrict preceding_words_weights; + ZoO_index preceding_words_weights_sum; + + if + ( + ZoO_knowledge_find_preceding_words + ( + k, + *sequence, + markov_order, + &preceding_words, + &preceding_words_weights, + &preceding_words_weights_sum + ) < 0 + ) + { + return -1; + } + + /* preceding_words_weights_sum > 0 */ + + if + ( + left_append + ( + weighted_random_pick + ( + preceding_words_weights, + preceding_words_weights_sum + ), + sequence, + sequence_size + ) < 0 + ) + { + return -3; + } + + return 0; +} + +/* + * Continuously adds ids to the left of the sequence, according to what is known + * as likely to fit there. If {credits} is NULL, it will stop upon reaching + * the id indicating the start of a sequence, otherwise it will also limit to + * {*credits} words added (including the one indicating the start of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remains unfreed. + * Returns: + * 0 on success. + * -1 iff we did not manage to have ZoO_START_OF_SEQUENCE_ID as a starting + * point. This cannot be caused by lack of {*credits}, but rather by a + * memory allocation problem or a more important issue in {k}. Indeed, it + * could mean we do not know any word preceding {*sequence[0]}, not even + * ZoO_START_OF_SEQUENCE_ID. + * Pre: + * (initialized {sequence}) + * (initialized {sequence_size}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_left_part_of_sequence +( + ZoO_index * sequence [restrict static 1], + size_t sequence_size [const restrict static 1], + const ZoO_index markov_order, + ZoO_index credits [const restrict], + const struct ZoO_knowledge k [const restrict static 1] +) +{ + for (;;) + { + if ((credits == (ZoO_index *) NULL) || (*credits > 0)) + { + if (extend_left(sequence, *sequence_size, markov_order, k) < 0) + { + /* We are sure *sequence[0] is defined. */ + if (*sequence[0] == ZoO_START_OF_SEQUENCE_ID) + { + /* + * We failed to add a word, but it was because none should have + * been added. + */ + return 0; + } + else + { + return -1; + } + } + } + else + { + /* No more credits available, the sequence will have to start here. */ + *sequence[0] = ZoO_START_OF_SEQUENCE_ID; + + return 0; + } + + /* + * Safe: if it was going to overflow, extend_left would have returned a + * negative value, making this statement unreachable. + */ + *sequence_size = (*sequence_size + sizeof(ZoO_index)); + + if (credits != (ZoO_index *) NULL) + { + *credits -= 1; + } + + /* We are sure *sequence[0] is defined. */ + switch (*sequence[0]) + { + case ZoO_END_OF_SEQUENCE_ID: + ZoO_S_WARNING + ( + "END OF LINE was added at the left part of an sequence." + ); + + *sequence[0] = ZoO_START_OF_SEQUENCE_ID; + return 0; + + case ZoO_START_OF_SEQUENCE_ID: + return 0; + + default: + break; + } + } +} + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE RIGHT **********************************************/ +/******************************************************************************/ + +/* + * Adds an id to the right of the sequence. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remain untouched. + * Returns: + * 0 on success. + * -1 iff adding the word would cause an overflow. + * -2 iff memory allocation was unsuccessful. + * Post: + * (initialized {sequence}) + * (initialized {*sequence}) + */ +static int right_append +( + ZoO_index * sequence [const restrict], + const ZoO_index word_id, + const size_t sequence_size, + const ZoO_index sequence_length +) +{ + ZoO_index * new_sequence; + + if ((SIZE_MAX - sizeof(ZoO_index)) > sequence_size) + { + ZoO_S_ERROR + ( + "Right side append aborted, as the new sequence's size would overflow." + ); + + return -1; + } + + new_sequence = + (ZoO_index *) realloc + ( + sequence, + (sequence_size + sizeof(ZoO_index)) + ); + + if (new_sequence == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Right side append aborted, as memory for the new sequence could not " + "be allocated." + ); + + return -2; + } + + new_sequence[sequence_length] = word_id; + + *sequence = new_sequence; + + return 0; +} + +/* + * Adds an id to the right of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Returns: + * 0 on success. + * -1 iff nothing fitting was found. + * -2 iff the addition of that id failed. + * Pre: + * (initialized {sequence}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int extend_right +( + ZoO_index * sequence [const restrict static 1], + const size_t sequence_size, + const ZoO_index markov_order, + const ZoO_index sequence_length, + const struct ZoO_knowledge k [const restrict static 1] +) +{ + const ZoO_index * restrict following_words; + const ZoO_index * restrict following_words_weights; + + ZoO_index following_words_weights_sum; + + if + ( + ZoO_knowledge_find_following_words + ( + k, + *sequence, + sequence_length, + markov_order, + &following_words, + &following_words_weights, + &following_words_weights_sum + ) < 0 + ) + { + return -1; + } + + /* following_words_weights_sum > 0 */ + + if + ( + right_append + ( + sequence, + weighted_random_pick + ( + following_words_weights, + following_words_weights_sum + ), + sequence_size, + sequence_length + ) < 0 + ) + { + return -3; + } + + return 0; +} + +/* + * Continuously adds ids to the right of the sequence, according to what is + * known as likely to fit there. If {credits} is NULL, it will stop upon + * reaching the id indicating the end of a sequence, otherwise it will also + * limit to {*credits} words added (including the one indicating the end of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remain untouched. + * Returns: + * 0 on success. + * -1 iff we did not manage to have ZoO_END_OF_SEQUENCE_ID as a stopping + * point. This cannot be caused by lack of {*credits}, but rather by a + * memory allocation problem or a more important issue in {k}. Indeed, it + * could mean we do not know any word following {*sequence[0]}, not even + * ZoO_END_OF_SEQUENCE_ID. + * Pre: + * (initialized {sequence}) + * (initialized {*sequence_size}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_right_part_of_sequence +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_size [const restrict static 1], + const ZoO_index markov_order, + ZoO_index credits [const restrict], + const struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index sequence_length; + + sequence_length = (*sequence_size / sizeof(ZoO_index)); + + for (;;) + { + if ((credits == (ZoO_index *) NULL) || (*credits > 0)) + { + if + ( + extend_right + ( + sequence, + *sequence_size, + markov_order, + sequence_length, + k + ) < 0 + ) + { + /* Safe: (> sequence_length 1) */ + if (*sequence[(sequence_length - 1)] == ZoO_END_OF_SEQUENCE_ID) + { + /* + * We failed to add a word, but it was because none should have + * been added. + */ + return 0; + } + else + { + return -1; + } + } + } + else + { + /* No more credits available, we end the sequence. */ + *sequence[(sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; + + return 0; + } + + /* + * Safe: if it was going to overflow, extend_left would have returned a + * negative value, making this statement unreachable. + */ + *sequence_size = (*sequence_size + sizeof(ZoO_index)); + sequence_length += 1; + + if (credits != (ZoO_index *) NULL) + { + *credits -= 1; + } + + /* Safe: (> sequence_length 1) */ + switch (*sequence[(sequence_length - 1)]) + { + case ZoO_START_OF_SEQUENCE_ID: + ZoO_S_WARNING + ( + "END OF LINE was added at the right part of an sequence." + ); + + *sequence[(sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; + return 0; + + case ZoO_END_OF_SEQUENCE_ID: + return 0; + + default: + break; + } + } +} + +/******************************************************************************/ +/** INITIALIZING SEQUENCE *****************************************************/ +/******************************************************************************/ + +/* + * Allocates the memory required to store the initial sequence. + * Returns: + * 0 on success. + * -1 if this would require more memory than can indicate a size_t variable. + * -2 if the allocation failed. + * Post: + * (initialized {*sequence}) + * (initialized {*sequence_size}) + */ +static int allocate_initial_sequence +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_size [const restrict static 1], + const ZoO_index markov_order +) +{ + if ((SIZE_MAX / sizeof(ZoO_index)) > ((size_t) markov_order)) + { + ZoO_S_ERROR + ( + "Unable to store size of the initial sequence in a size_t variable." + "Either reduce the size of a ZoO_index or the markovian order." + ); + + *sequence = (ZoO_index *) NULL; + *sequence_size = 0; + + return -1; + } + + *sequence_size = (((size_t) markov_order) * sizeof(ZoO_index)); + *sequence = (ZoO_index *) malloc(*sequence_size); + + if (*sequence == (void *) NULL) + { + *sequence_size = 0; + + ZoO_S_ERROR + ( + "Unable to allocate the memory required for an new sequence." + ); + + return -2; + } + + return 0; +} + +/* + * Initializes an pre-allocated sequence by filling it with {initial_word} + * followed by a sequence of ({markov_order} - 1) words that is known to have + * followed {initial_word} at least once. This sequence is chosen depending on + * how often {k} indicates it has followed {initial_word}. Note that if + * {markov_order} is 1, there is no sequence added, simply {initial_word}. + * Returns: + * 0 on success. + * -1 if no such sequence was found. + * Pre: + * (size (= {sequence} {markov_order})) + * (initialized {k}) + * (> markov_order 0) + * Post: + * (initialized {sequence[0..(markov_order - 1)]}) + */ +static int initialize_sequence +( + ZoO_index sequence [const restrict static 1], + const ZoO_index initial_word, + const ZoO_index markov_order, + const struct ZoO_knowledge k [const static 1] +) +{ + const ZoO_index * const restrict * following_sequences; + const ZoO_index * following_sequences_weights; + ZoO_index following_sequences_weights_sum; + ZoO_index chosen_sequence; + + sequence[0] = initial_word; + + if (markov_order == 1) + { + return 0; + } + + if + ( + ZoO_knowledge_get_following_sequences + ( + k, + initial_word, + &following_sequences, + &following_sequences_weights, + &following_sequences_weights_sum + ) < 0 + ) + { + ZoO_S_ERROR + ( + "Unable to find any sequence that would precede the initial word." + ); + + return -1; + } + + chosen_sequence = + weighted_random_pick + ( + following_sequences_weights, + following_sequences_weights_sum + ); + + /* Safe if 'allocate_initial_sequence' succeeded. */ + memcpy + ( + (void *) (sequence + 1), + (const void *) (following_sequences + chosen_sequence), + ((((size_t) markov_order) - 1) * sizeof(ZoO_index)) + ); + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +/* See "sequence.h" */ +int ZoO_sequence_create_from +( + const ZoO_index initial_word, + ZoO_index credits [const restrict], + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_index markov_order, + ZoO_index * sequence [const restrict static 1], + size_t sequence_size [const restrict static 1] +) +{ + if (allocate_initial_sequence(sequence, sequence_size, markov_order) < 0) + { + return -1; + } + + if (initialize_sequence(*sequence, initial_word, markov_order, k) < 0) + { + free((void *) *sequence); + *sequence_size = 0; + + return -2; + } + + if + ( + complete_right_part_of_sequence + ( + sequence, + sequence_size, + markov_order, + credits, + k + ) < 0 + ) + { + free((void *) *sequence); + *sequence_size = 0; + + return -3; + } + + if + ( + complete_left_part_of_sequence + ( + sequence, + sequence_size, + markov_order, + credits, + k + ) < 0 + ) + { + free((void *) *sequence); + *sequence_size = 0; + + return -4; + } + + if ((*sequence_size / sizeof(ZoO_index)) < 3) + { + /* 2 elements, for start and stop. */ + ZoO_S_ERROR("Created sequence was empty."); + + free((void *) *sequence); + *sequence_size = 0; + + return -5; + } + + return 0; +} diff --git a/src/pervasive.h b/src/pervasive.h index 9e1faf7..b830326 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -39,16 +39,6 @@ #define ZoO_DEFAULT_REPLY_RATE 8 #endif -#ifndef ZoO_MARKOV_ORDER - #define ZoO_MARKOV_ORDER 3 -#endif - - -/* ZoO_char = UTF-8 char */ -typedef char ZoO_char; -/* Functions that can handle UTF-8 'char' will use this symbol. */ -#define ZoO_CHAR_STRING_SYMBOL "%s" - #define ZoO__TO_STRING(x) #x #define ZoO_TO_STRING(x) ZoO__TO_STRING(x) #define ZoO_ISOLATE(a) do {a} while (0) -- cgit v1.2.3-70-g09d2