| summaryrefslogtreecommitdiff |
diff options
| -rw-r--r-- | src/core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/core/char.c | 48 | ||||
| -rw-r--r-- | src/core/char.h | 21 | ||||
| -rw-r--r-- | src/core/char_types.h | 17 | ||||
| -rw-r--r-- | src/core/knowledge.c | 50 | ||||
| -rw-r--r-- | src/core/knowledge.h | 78 | ||||
| -rw-r--r-- | src/core/knowledge_search.c | 339 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 58 | ||||
| -rw-r--r-- | src/core/sequence.c | 152 | ||||
| -rw-r--r-- | src/core/sequence.h | 23 | ||||
| -rw-r--r-- | src/core/sequence_creation.c (renamed from src/core/create_sequence.c) | 95 | ||||
| -rw-r--r-- | src/pervasive.h | 10 |
12 files changed, 586 insertions, 309 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index af5ca65..37b95cb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,9 +1,11 @@ set( SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/char.c ${CMAKE_CURRENT_SOURCE_DIR}/main.c ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c - ${CMAKE_CURRENT_SOURCE_DIR}/create_sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c ) diff --git a/src/core/char.c b/src/core/char.c new file mode 100644 index 0000000..39ca72e --- /dev/null +++ b/src/core/char.c @@ -0,0 +1,48 @@ +#include <string.h> + +#include "char.h" + +int ZoO_char_is_banned (const ZoO_char c) +{ + switch (c) + { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '<': + case '>': + return 1; + + default: + return 0; + } +} + +int ZoO_char_is_punctuation (const ZoO_char c) +{ + switch (c) + { + case '!': + case ',': + case '.': + case ':': + case ';': + case '?': + return 1; + + default: + return 0; + } +} + +int ZoO_word_cmp +( + const ZoO_char word_a [const static 1], + const ZoO_char word_b [const static 1] +) +{ + return strcmp((const char *) word_a, (const char *) word_b); +} diff --git a/src/core/char.h b/src/core/char.h new file mode 100644 index 0000000..772a3a2 --- /dev/null +++ b/src/core/char.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_CORE_CHAR_H_ +#define _ZoO_CORE_CHAR_H_ + +#include "char_types.h" + +enum ZoO_word_property ZoO_get_word_property +( + const ZoO_char word [const restrict], + size_t word_size +); + +int ZoO_word_cmp +( + const ZoO_char word_a [const static 1], + const ZoO_char word_b [const static 1] +); + +int ZoO_char_is_punctuation (const ZoO_char c); +int ZoO_word_char_is_banned (const ZoO_char c); + +#endif diff --git a/src/core/char_types.h b/src/core/char_types.h new file mode 100644 index 0000000..67b5294 --- /dev/null +++ b/src/core/char_types.h @@ -0,0 +1,17 @@ +#ifndef _ZoO_CORE_CHAR_TYPES_H_ +#define _ZoO_CORE_CHAR_TYPES_H_ + +enum ZoO_word_property +{ + ZoO_WORD_NO_PROPERTY, + ZoO_WORD_HAS_NO_LEFT_SEPARATOR, + ZoO_WORD_HAS_NO_RIGHT_SEPARATOR +}; + +/* ZoO_char = UTF-8 char */ +typedef char ZoO_char; + +/* Functions that can handle UTF-8 'char' will use this symbol. */ +#define ZoO_CHAR_STRING_SYMBOL "%s" + +#endif diff --git a/src/core/knowledge.c b/src/core/knowledge.c index 4980fdd..279a646 100644 --- a/src/core/knowledge.c +++ b/src/core/knowledge.c @@ -9,56 +9,6 @@ /** Basic functions of the ZoO_knowledge structure ****************************/ -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_punctuation_chars_count = 8; -const ZoO_char const ZoO_knowledge_punctuation_chars[8] = - { - '!', - ',', - '.', - ':', - ';', - '?', - '~', - '\001' - }; - -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_forbidden_chars_count = 8; -const ZoO_char const ZoO_knowledge_forbidden_chars[8]= - { - '(', - ')', - '[', - ']', - '{', - '}', - '<', - '>' - }; - -static int cmp_word -( - const void * const a, - const void * const b, - const void * const other -) -{ - ZoO_char const * word; - ZoO_index const * sorted_index; - struct ZoO_knowledge const * k; - - word = (ZoO_char const *) a; - sorted_index = (ZoO_index const *) b; - k = (struct ZoO_knowledge *) other; - - return strcmp - ( - (const char *) word, - (const char *) k->words[*sorted_index].word - ); -} - /* See "knowledge.h". */ int ZoO_knowledge_find ( diff --git a/src/core/knowledge.h b/src/core/knowledge.h index 7b5d754..b4f7b7e 100644 --- a/src/core/knowledge.h +++ b/src/core/knowledge.h @@ -1,7 +1,8 @@ #ifndef _ZoO_CORE_KNOWLEDGE_H_ #define _ZoO_CORE_KNOWLEDGE_H_ -#include "../tool/strings_types.h" +#include "../core/char_types.h" +#include "../core/index_types.h" #include "knowledge_types.h" @@ -24,22 +25,6 @@ int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]); */ void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); -/* - * When returning 0: - * {word} is in {k}. - * {word} is located at {k->words[*result]}. - * - * When returning -1: - * {word} is not in {k}. - * {*result} is where {word} was expected to be found in - * {k->sorted_indices}. - */ -int ZoO_knowledge_find -( - const struct ZoO_knowledge k [const restrict static 1], - const ZoO_char word [const restrict static 1], - ZoO_index result [const restrict static 1] -); /* * When returning 0: @@ -59,39 +44,58 @@ int ZoO_knowledge_learn ZoO_index result [const restrict static 1] ); -int ZoO_knowledge_assimilate +int ZoO_knowledge_learn_sequence ( struct ZoO_knowledge k [const static 1], - struct ZoO_strings string [const restrict static 1], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count] + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length ); -int ZoO_knowledge_extend +int ZoO_knowledge_get_following_sequences ( - struct ZoO_knowledge k [const static 1], - const struct ZoO_strings string [const], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count], - ZoO_char * result [const static 1] + const struct ZoO_knowledge k [const static 1], + const ZoO_index initial_word, + const ZoO_index * const restrict * following_sequences [const restrict static 1], + const ZoO_index * following_sequences_weights [const restrict static 1], + const ZoO_index following_sequences_weights_sum [const static 1] ); -int ZoO_knowledge_find_link +/* + * When returning 0: + * {word} is in {k}. + * {word} is located at {k->words[*result]}. + * + * When returning -1: + * {word} is not in {k}. + * {*result} is where {word} was expected to be found in + * {k->sorted_indices}. + */ +int ZoO_knowledge_find_word_id ( - ZoO_index const links_count, - struct ZoO_knowledge_link links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], ZoO_index result [const restrict static 1] ); -/* Create it if it's not found. */ -int ZoO_knowledge_get_link +int ZoO_knowledge_find_preceding_words ( - ZoO_index links_count [const], - struct ZoO_knowledge_link * links [const], - ZoO_index const sequence [const restrict static ZoO_S_LINK_SIZE], - ZoO_index result [const restrict static 1] + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] ); +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +); #endif diff --git a/src/core/knowledge_search.c b/src/core/knowledge_search.c new file mode 100644 index 0000000..af62266 --- /dev/null +++ b/src/core/knowledge_search.c @@ -0,0 +1,339 @@ +#include <stdlib.h> + +#include "../core/char.h" +#include "../core/index.h" +#include "../core/sequence.h" + +#include "../io/error.h" + +#include "knowledge.h" + +/* See "knowledge.h". */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + + /* Handles the case where the list is empty ********************************/ + current_max = k->words_length; + + if (current_max == 0) + { + *result = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *result = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = current_min; + + return -1; + } + + current_max = (i - 1); + } + else + { + *result = i; + + return 0; + } + } +} + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, /* Pre: (> 0) */ + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + const ZoO_index markov_sequence_length = (markov_order - 1); + + if (sequence[markov_sequence_length] >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the preceding words of an unknown word." + ); + + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -1; + } + + *preceding_words_weights_sum = + k->words[sequence[markov_sequence_length]].occurrences; + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *preceding_words = + (const ZoO_index *) k->words + [ + sequence[markov_sequence_length] + ].preceded.targets; + + *preceding_words_weights = + (const ZoO_index *) k->words + [ + sequence[markov_sequence_length] + ].preceded.targets_occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = + k->words[sequence[markov_sequence_length]].preceded.sequences_length; + + if (current_max == 0) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + ZoO_S_ERROR + ( + "Attempting to find the preceding words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + sequence, + markov_sequence_length, + k->words[sequence[markov_sequence_length]].preceded.sequences[i], + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *preceding_words = + k->words + [ + sequence[markov_sequence_length] + ].preceded.targets[i]; + + *preceding_words_weights = + k->words + [ + sequence[markov_sequence_length] + ].preceded.targets_occurrences[i]; + + return 0; + } + } +} + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index word_of_interest = + (sequence_length - markov_sequence_length) - 1; + + if (sequence[word_of_interest] >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the following words of an unknown word." + ); + + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -1; + } + + *following_words_weights_sum = + k->words[sequence[word_of_interest]].occurrences; + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *following_words = + (const ZoO_index *) k->words + [ + sequence[word_of_interest] + ].preceded.targets; + + *following_words_weights = + (const ZoO_index *) k->words + [ + sequence[word_of_interest] + ].preceded.targets_occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[sequence[word_of_interest]].preceded.sequences_length; + + if (current_max == 0) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + ZoO_S_WARNING + ( + "Attempting to find the following words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + (sequence + word_of_interest), + markov_sequence_length, + k->words[sequence[word_of_interest]].followed.sequences[i], + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *following_words = + k->words + [ + sequence[markov_sequence_length] + ].followed.targets[i]; + + *following_words_weights = + k->words + [ + sequence[markov_sequence_length] + ].followed.targets_occurrences[i]; + + return 0; + } + } +} diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h index e92b5e1..aea11da 100644 --- a/src/core/knowledge_types.h +++ b/src/core/knowledge_types.h @@ -1,62 +1,34 @@ #ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ #define _ZoO_CORE_KNOWLEDGE_TYPES_H_ -#include "../pervasive.h" +#include "../core/index_types.h" +#include "../core/char_types.h" -#define ZoO_WORD_START_OF_LINE 0 -#define ZoO_WORD_END_OF_LINE 1 - -#if ZoO_MARKOV_ORDER == 1 - #define ZoO_SEQUENCE_SIZE 1 -#else - #define ZoO_SEQUENCE_SIZE ZoO_MARKOV_ORDER - 1 -#endif - -#define ZoO_S_LINK_SIZE (ZoO_SEQUENCE_SIZE + 1) - -/* XXX: are we as close to immutable as we want to be? */ -extern unsigned int const ZoO_knowledge_punctuation_chars_count; -extern const ZoO_char const ZoO_knowledge_punctuation_chars[8]; -extern unsigned int const ZoO_knowledge_forbidden_chars_count; -extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; - - -enum ZoO_knowledge_special_effect +struct ZoO_knowledge_sequence_collection { - ZoO_WORD_HAS_NO_EFFECT, - ZoO_WORD_ENDS_SENTENCE, - ZoO_WORD_STARTS_SENTENCE, - ZoO_WORD_REMOVES_LEFT_SPACE, - ZoO_WORD_REMOVES_RIGHT_SPACE -}; - -struct ZoO_knowledge_link -{ - ZoO_index sequence[ZoO_SEQUENCE_SIZE]; - ZoO_index occurrences; - ZoO_index targets_count; - ZoO_index * targets_occurrences; - ZoO_index * targets; + ZoO_index ** sequences; + ZoO_index sequences_length; + ZoO_index * sequences_sorted; + ZoO_index * occurrences; + ZoO_index ** targets; + ZoO_index * targets_length; + ZoO_index ** targets_occurrences; }; struct ZoO_knowledge_word { - size_t word_size; ZoO_char * word; - enum ZoO_knowledge_special_effect special; + size_t word_size; ZoO_index occurrences; - ZoO_index forward_links_count; - ZoO_index backward_links_count; - struct ZoO_knowledge_link * forward_links; - struct ZoO_knowledge_link * backward_links; + struct ZoO_knowledge_sequence_collection followed; + struct ZoO_knowledge_sequence_collection preceded; }; - struct ZoO_knowledge { - ZoO_index words_count; - ZoO_index * sorted_indices; struct ZoO_knowledge_word * words; + ZoO_index words_length; + ZoO_index * words_sorted; }; #endif diff --git a/src/core/sequence.c b/src/core/sequence.c index 67174d1..9e370a3 100644 --- a/src/core/sequence.c +++ b/src/core/sequence.c @@ -1,129 +1,75 @@ #include <stdlib.h> #include <string.h> -#include "../io/error.h" -#include "../tool/sorted_list.h" +#include "../core/index.h" -#include "knowledge.h" +#include "sequence.h" -static int cmp_seq_link +/* See "sequence.h" */ +int ZoO_sequence_cmp ( - const void * const a, - const void * const b, - const void * const other + const ZoO_index sequence_a [const], + const ZoO_index sequence_a_length, + const ZoO_index sequence_b [const], + const ZoO_index sequence_b_length ) { - ZoO_index j; - const ZoO_index * sequence; - const struct ZoO_knowledge_link * link; + ZoO_index min_length; + ZoO_index i; - sequence = (const ZoO_index *) a; - link = (const struct ZoO_knowledge_link *) b; + if (sequence_a_length < sequence_b_length) + { + min_length = sequence_a_length; + } + else + { + min_length = sequence_b_length; + } - for (j = 0; j < ZoO_SEQUENCE_SIZE; ++j) + for (i = 0; i < min_length; ++i) { - if (sequence[j] < link->sequence[j]) + if (sequence_a[i] < sequence_b[i]) { return -1; } - else if (sequence[j] > link->sequence[j]) + else if (sequence_b[i] > sequence_b[i]) { return 1; } - } - - return 0; -} - -int ZoO_knowledge_find_link -( - ZoO_index const links_count, - struct ZoO_knowledge_link links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], - ZoO_index result [const restrict static 1] -) -{ - return - ZoO_sorted_list_index_of + else if ( - links_count, - (void const *) links, - (void const *) sequence, - sizeof(struct ZoO_knowledge_link), - cmp_seq_link, - (void const *) NULL, - result - ); -} - -int ZoO_knowledge_get_link -( - ZoO_index links_count [const], - struct ZoO_knowledge_link * links [const], - ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], - ZoO_index result [const restrict static 1] -) -{ - struct ZoO_knowledge_link * new_p; + (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) + && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) + ) + { + return 0; + } + } - if - ( - ZoO_sorted_list_index_of - ( - *links_count, - (void const *) *links, - (void const *) sequence, - sizeof(struct ZoO_knowledge_link), - cmp_seq_link, - (void const *) NULL, - result - ) == 0 - ) + if (sequence_a_length < sequence_b_length) { - return 0; + if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) + { + return 0; + } + else + { + return -1; + } } - - *links_count += 1; - - new_p = - (struct ZoO_knowledge_link *) realloc - ( - (void *) *links, - (sizeof(struct ZoO_knowledge_link) * (*links_count)) - ); - - if (new_p == (struct ZoO_knowledge_link *) NULL) + else if (sequence_a_length > sequence_b_length) { - *links_count -= 1; - - return -1; + if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) + { + return 0; + } + else + { + return 1; + } } - - if (*result < (*links_count - 1)) + else { - memmove( - (void *) (new_p + *result + 1), - (const void *) (new_p + *result), - (sizeof(struct ZoO_knowledge_link) * (*links_count - 1 - *result)) - ); + return 0; } - - *links = new_p; - - new_p += *result; - - memcpy - ( - (void *) new_p->sequence, - (void const *) sequence, - /* can be zero */ - (sizeof(ZoO_index) * ZoO_SEQUENCE_SIZE) - ); - - new_p->occurrences = 0; - new_p->targets_count = 0; - new_p->targets_occurrences = (ZoO_index *) NULL; - new_p->targets = (ZoO_index *) NULL; - - return 0; } diff --git a/src/core/sequence.h b/src/core/sequence.h index fb4b628..e609b4d 100644 --- a/src/core/sequence.h +++ b/src/core/sequence.h @@ -2,7 +2,7 @@ #define _ZoO_CORE_SEQUENCE_H_ #include "../core/index_types.h" -#include "../core/knownledge_types.h" +#include "../core/knowledge_types.h" #include "sequence_types.h" @@ -27,7 +27,7 @@ * (knows {k} {initial_word}) * (initialized {k}) */ -int ZoO_create_sequence_from +int ZoO_sequence_create_from ( const ZoO_index initial_word, ZoO_index credits [const restrict], @@ -37,4 +37,23 @@ int ZoO_create_sequence_from size_t sequence_size [const restrict static 1] ); +/* + * Compares two sequences. + * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated + * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and + * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to + * contain ZoO_END_OF_SEQUENCE. + * Return: + * 1 iff {sequence_a} should be considered being more than {sequence_b} + * 0 iff {sequence_a} should be considered being equal to {sequence_b} + * -1 iff {sequence_a} should be considered being less than {sequence_b} + */ +int ZoO_sequence_cmp +( + const ZoO_index sequence_a [const], + const ZoO_index sequence_a_length, + const ZoO_index sequence_b [const], + const ZoO_index sequence_b_length +); + #endif diff --git a/src/core/create_sequence.c b/src/core/sequence_creation.c index 6b2cb62..b1f0f36 100644 --- a/src/core/create_sequence.c +++ b/src/core/sequence_creation.c @@ -41,59 +41,6 @@ static ZoO_index weighted_random_pick return result; } -/* - * FIXME: This does not belong here. - * Calculates the size the sentence will have upon addition of the word, taking - * into account {effect}. - * Returns: - * 0 on success. - * -1 iff adding the word would overflow {sentence_size}. - * Post: - * (initialized new_size) - */ -static int get_new_size -( - const size_t word_size, - const size_t sentence_size, - const enum ZoO_knowledge_special_effect effect, - size_t new_size [const restrict static 1] -) -{ - size_t added_size; - - switch (effect) - { - case ZoO_WORD_HAS_NO_EFFECT: - /* word also contains an '\0', which we will replace by a ' ' */ - added_size = word_size; - break; - - case ZoO_WORD_ENDS_SENTENCE: - case ZoO_WORD_STARTS_SENTENCE: - added_size = 0; - break; - - case ZoO_WORD_REMOVES_LEFT_SPACE: - case ZoO_WORD_REMOVES_RIGHT_SPACE: - /* word also contains an '\0', which we will remove. */ - added_size = (word_size - 1); - break; - } - - if ((SIZE_MAX - word_size) > sentence_size) - { - /* New size Would overflow. */ - *new_size = sentence_size; - - return -1; - } - - /* Safe: (=< SIZE_MAX (+ sentence_size added_size)) */ - *new_size = (sentence_size + added_size); - - return 0; -} - /******************************************************************************/ /** ADDING ELEMENTS TO THE LEFT ***********************************************/ /******************************************************************************/ @@ -173,12 +120,14 @@ static int left_append * Pre: * (initialized {sequence}) * (initialized {k}) - * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + * (> {markov_order} 0) + * (initialized {*sequence[0..({markov_order} - 1)]}) */ static int extend_left ( ZoO_index * sequence [const restrict static 1], const size_t sequence_size, + const ZoO_index markov_order, const struct ZoO_knowledge k [const restrict static 1] ) { @@ -188,10 +137,11 @@ static int extend_left if ( - ZoO_knowledge_get_preceding_words + ZoO_knowledge_find_preceding_words ( k, *sequence, + markov_order, &preceding_words, &preceding_words_weights, &preceding_words_weights_sum @@ -242,12 +192,14 @@ static int extend_left * (initialized {sequence}) * (initialized {sequence_size}) * (initialized {k}) + * (> {markov_order} 0) * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) */ static int complete_left_part_of_sequence ( ZoO_index * sequence [restrict static 1], size_t sequence_size [const restrict static 1], + const ZoO_index markov_order, ZoO_index credits [const restrict], const struct ZoO_knowledge k [const restrict static 1] ) @@ -256,7 +208,7 @@ static int complete_left_part_of_sequence { if ((credits == (ZoO_index *) NULL) || (*credits > 0)) { - if (extend_left(sequence, *sequence_size, k) < 0) + if (extend_left(sequence, *sequence_size, markov_order, k) < 0) { /* We are sure *sequence[0] is defined. */ if (*sequence[0] == ZoO_START_OF_SEQUENCE_ID) @@ -386,12 +338,14 @@ static int right_append * Pre: * (initialized {sequence}) * (initialized {k}) + * (> {markov_order} 0) * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) */ static int extend_right ( ZoO_index * sequence [const restrict static 1], const size_t sequence_size, + const ZoO_index markov_order, const ZoO_index sequence_length, const struct ZoO_knowledge k [const restrict static 1] ) @@ -403,11 +357,12 @@ static int extend_right if ( - ZoO_knowledge_get_following_words + ZoO_knowledge_find_following_words ( k, *sequence, sequence_length, + markov_order, &following_words, &following_words_weights, &following_words_weights_sum @@ -459,12 +414,14 @@ static int extend_right * (initialized {sequence}) * (initialized {*sequence_size}) * (initialized {k}) + * (> {markov_order} 0) * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) */ static int complete_right_part_of_sequence ( ZoO_index * sequence [const restrict static 1], size_t sequence_size [const restrict static 1], + const ZoO_index markov_order, ZoO_index credits [const restrict], const struct ZoO_knowledge k [const restrict static 1] ) @@ -477,7 +434,17 @@ static int complete_right_part_of_sequence { if ((credits == (ZoO_index *) NULL) || (*credits > 0)) { - if (extend_right(sequence, *sequence_size, sequence_length, k) < 0) + if + ( + extend_right + ( + sequence, + *sequence_size, + markov_order, + sequence_length, + k + ) < 0 + ) { /* Safe: (> sequence_length 1) */ if (*sequence[(sequence_length - 1)] == ZoO_END_OF_SEQUENCE_ID) @@ -570,7 +537,7 @@ static int allocate_initial_sequence return -1; } - *sequence_size = ((size_t) markov_order) * sizeof(ZoO_index); + *sequence_size = (((size_t) markov_order) * sizeof(ZoO_index)); *sequence = (ZoO_index *) malloc(*sequence_size); if (*sequence == (void *) NULL) @@ -612,8 +579,8 @@ static int initialize_sequence const struct ZoO_knowledge k [const static 1] ) { - const ZoO_index * const restrict * restrict following_sequences; - const ZoO_index * restrict following_sequences_weights; + const ZoO_index * const restrict * following_sequences; + const ZoO_index * following_sequences_weights; ZoO_index following_sequences_weights_sum; ZoO_index chosen_sequence; @@ -656,7 +623,7 @@ static int initialize_sequence ( (void *) (sequence + 1), (const void *) (following_sequences + chosen_sequence), - (((size_t) markov_order) - 1) * sizeof(ZoO_index) + ((((size_t) markov_order) - 1) * sizeof(ZoO_index)) ); return 0; @@ -667,7 +634,7 @@ static int initialize_sequence /******************************************************************************/ /* See "sequence.h" */ -int ZoO_create_sequence_from +int ZoO_sequence_create_from ( const ZoO_index initial_word, ZoO_index credits [const restrict], @@ -696,6 +663,7 @@ int ZoO_create_sequence_from ( sequence, sequence_size, + markov_order, credits, k ) < 0 @@ -713,6 +681,7 @@ int ZoO_create_sequence_from ( sequence, sequence_size, + markov_order, credits, k ) < 0 diff --git a/src/pervasive.h b/src/pervasive.h index 9e1faf7..b830326 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -39,16 +39,6 @@ #define ZoO_DEFAULT_REPLY_RATE 8 #endif -#ifndef ZoO_MARKOV_ORDER - #define ZoO_MARKOV_ORDER 3 -#endif - - -/* ZoO_char = UTF-8 char */ -typedef char ZoO_char; -/* Functions that can handle UTF-8 'char' will use this symbol. */ -#define ZoO_CHAR_STRING_SYMBOL "%s" - #define ZoO__TO_STRING(x) #x #define ZoO_TO_STRING(x) ZoO__TO_STRING(x) #define ZoO_ISOLATE(a) do {a} while (0) |


