| summaryrefslogtreecommitdiff | 
diff options
| -rw-r--r-- | src/core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/core/char.c | 48 | ||||
| -rw-r--r-- | src/core/char.h | 21 | ||||
| -rw-r--r-- | src/core/char_types.h | 17 | ||||
| -rw-r--r-- | src/core/knowledge.c | 50 | ||||
| -rw-r--r-- | src/core/knowledge.h | 78 | ||||
| -rw-r--r-- | src/core/knowledge_search.c | 339 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 58 | ||||
| -rw-r--r-- | src/core/sequence.c | 152 | ||||
| -rw-r--r-- | src/core/sequence.h | 23 | ||||
| -rw-r--r-- | src/core/sequence_creation.c (renamed from src/core/create_sequence.c) | 95 | ||||
| -rw-r--r-- | src/pervasive.h | 10 | 
12 files changed, 586 insertions, 309 deletions
| diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index af5ca65..37b95cb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,9 +1,11 @@  set(     SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/char.c     ${CMAKE_CURRENT_SOURCE_DIR}/main.c     ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c     ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c -   ${CMAKE_CURRENT_SOURCE_DIR}/create_sequence.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c     ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c  ) diff --git a/src/core/char.c b/src/core/char.c new file mode 100644 index 0000000..39ca72e --- /dev/null +++ b/src/core/char.c @@ -0,0 +1,48 @@ +#include <string.h> + +#include "char.h" + +int ZoO_char_is_banned (const ZoO_char c) +{ +   switch (c) +   { +      case '(': +      case ')': +      case '[': +      case ']': +      case '{': +      case '}': +      case '<': +      case '>': +         return 1; + +      default: +         return 0; +   } +} + +int ZoO_char_is_punctuation (const ZoO_char c) +{ +   switch (c) +   { +      case '!': +      case ',': +      case '.': +      case ':': +      case ';': +      case '?': +         return 1; + +      default: +         return 0; +   } +} + +int ZoO_word_cmp +( +   const ZoO_char word_a [const static 1], +   const ZoO_char word_b [const static 1] +) +{ +   return strcmp((const char *) word_a, (const char *) word_b); +} diff --git a/src/core/char.h b/src/core/char.h new file mode 100644 index 0000000..772a3a2 --- /dev/null +++ b/src/core/char.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_CORE_CHAR_H_ +#define _ZoO_CORE_CHAR_H_ + +#include "char_types.h" + +enum ZoO_word_property ZoO_get_word_property +( +   const ZoO_char word [const restrict], +   size_t word_size +); + +int ZoO_word_cmp +( +   const ZoO_char word_a [const static 1], +   const ZoO_char word_b [const static 1] +); + +int ZoO_char_is_punctuation (const ZoO_char c); +int ZoO_word_char_is_banned (const ZoO_char c); + +#endif diff --git a/src/core/char_types.h b/src/core/char_types.h new file mode 100644 index 0000000..67b5294 --- /dev/null +++ b/src/core/char_types.h @@ -0,0 +1,17 @@ +#ifndef _ZoO_CORE_CHAR_TYPES_H_ +#define _ZoO_CORE_CHAR_TYPES_H_ + +enum ZoO_word_property +{ +   ZoO_WORD_NO_PROPERTY, +   ZoO_WORD_HAS_NO_LEFT_SEPARATOR, +   ZoO_WORD_HAS_NO_RIGHT_SEPARATOR +}; + +/* ZoO_char = UTF-8 char */ +typedef char ZoO_char; + +/* Functions that can handle UTF-8 'char' will use this symbol. */ +#define ZoO_CHAR_STRING_SYMBOL "%s" + +#endif diff --git a/src/core/knowledge.c b/src/core/knowledge.c index 4980fdd..279a646 100644 --- a/src/core/knowledge.c +++ b/src/core/knowledge.c @@ -9,56 +9,6 @@  /** Basic functions of the ZoO_knowledge structure ****************************/ -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_punctuation_chars_count = 8; -const ZoO_char const ZoO_knowledge_punctuation_chars[8] = -   { -      '!', -      ',', -      '.', -      ':', -      ';', -      '?', -      '~', -      '\001' -   }; - -/* XXX: are we as close to immutable as we want to be? */ -unsigned int const ZoO_knowledge_forbidden_chars_count = 8; -const ZoO_char const ZoO_knowledge_forbidden_chars[8]= -   { -      '(', -      ')', -      '[', -      ']', -      '{', -      '}', -      '<', -      '>' -   }; - -static int cmp_word -( -   const void * const a, -   const void * const b, -   const void * const other -) -{ -   ZoO_char const * word; -   ZoO_index const * sorted_index; -   struct ZoO_knowledge const * k; - -   word = (ZoO_char const *) a; -   sorted_index = (ZoO_index const *) b; -   k = (struct ZoO_knowledge *) other; - -   return strcmp -   ( -      (const char *) word, -      (const char *) k->words[*sorted_index].word -   ); -} -  /* See "knowledge.h". */  int ZoO_knowledge_find  ( diff --git a/src/core/knowledge.h b/src/core/knowledge.h index 7b5d754..b4f7b7e 100644 --- a/src/core/knowledge.h +++ b/src/core/knowledge.h @@ -1,7 +1,8 @@  #ifndef _ZoO_CORE_KNOWLEDGE_H_  #define _ZoO_CORE_KNOWLEDGE_H_ -#include "../tool/strings_types.h" +#include "../core/char_types.h" +#include "../core/index_types.h"  #include "knowledge_types.h" @@ -24,22 +25,6 @@ int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]);   */  void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); -/* - * When returning 0: - *    {word} is in {k}. - *    {word} is located at {k->words[*result]}. - * - * When returning -1: - *    {word} is not in {k}. - *    {*result} is where {word} was expected to be found in - *    {k->sorted_indices}. - */ -int ZoO_knowledge_find -( -   const struct ZoO_knowledge k [const restrict static 1], -   const ZoO_char word [const restrict static 1], -   ZoO_index result [const restrict static 1] -);  /*   * When returning 0: @@ -59,39 +44,58 @@ int ZoO_knowledge_learn     ZoO_index result [const restrict static 1]  ); -int ZoO_knowledge_assimilate +int ZoO_knowledge_learn_sequence  (     struct ZoO_knowledge k [const static 1], -   struct ZoO_strings string [const restrict static 1], -   ZoO_index const aliases_count, -   const char * restrict aliases [const restrict static aliases_count] +   const ZoO_index sequence [const restrict], +   const ZoO_index sequence_length  ); -int ZoO_knowledge_extend +int ZoO_knowledge_get_following_sequences  ( -   struct ZoO_knowledge k [const static 1], -   const struct ZoO_strings string [const], -   ZoO_index const aliases_count, -   const char * restrict aliases [const restrict static aliases_count], -   ZoO_char * result [const static 1] +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index initial_word, +   const ZoO_index * const restrict * following_sequences [const restrict static 1], +   const ZoO_index * following_sequences_weights [const restrict static 1], +   const ZoO_index following_sequences_weights_sum [const static 1]  ); -int ZoO_knowledge_find_link +/* + * When returning 0: + *    {word} is in {k}. + *    {word} is located at {k->words[*result]}. + * + * When returning -1: + *    {word} is not in {k}. + *    {*result} is where {word} was expected to be found in + *    {k->sorted_indices}. + */ +int ZoO_knowledge_find_word_id  ( -   ZoO_index const links_count, -   struct ZoO_knowledge_link links [const], -   ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], +   const struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1],     ZoO_index result [const restrict static 1]  ); -/* Create it if it's not found. */ -int ZoO_knowledge_get_link +int ZoO_knowledge_find_preceding_words  ( -   ZoO_index links_count [const], -   struct ZoO_knowledge_link * links [const], -   ZoO_index const sequence [const restrict static ZoO_S_LINK_SIZE], -   ZoO_index result [const restrict static 1] +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index sequence [const restrict], +   const ZoO_index markov_order, +   const ZoO_index * restrict preceding_words [const restrict static 1], +   const ZoO_index * restrict preceding_words_weights [const restrict static 1], +   ZoO_index preceding_words_weights_sum [const restrict static 1]  ); +int ZoO_knowledge_find_following_words +( +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index sequence [const restrict], +   const ZoO_index sequence_length, +   const ZoO_index markov_order, +   const ZoO_index * restrict following_words [const restrict static 1], +   const ZoO_index * restrict following_words_weights [const restrict static 1], +   ZoO_index following_words_weights_sum [const restrict static 1] +);  #endif diff --git a/src/core/knowledge_search.c b/src/core/knowledge_search.c new file mode 100644 index 0000000..af62266 --- /dev/null +++ b/src/core/knowledge_search.c @@ -0,0 +1,339 @@ +#include <stdlib.h> + +#include "../core/char.h" +#include "../core/index.h" +#include "../core/sequence.h" + +#include "../io/error.h" + +#include "knowledge.h" + +/* See "knowledge.h". */ +int ZoO_knowledge_find_word_id +( +   const struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1], +   ZoO_index result [const restrict static 1] +) +{ +   /* This is a binary search */ +   int cmp; +   ZoO_index i, current_min, current_max; +   ZoO_index candidate_id; + +   /* Handles the case where the list is empty ********************************/ +   current_max = k->words_length; + +   if (current_max == 0) +   { +      *result = 0; + +      return -1; +   } +   /***************************************************************************/ + +   current_min = 0; +   current_max -= 1; + +   for (;;) +   { +      i = (current_min + ((current_max - current_min) / 2)); + +      cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word); + +      if (cmp > 0) +      { +         current_min = (i + 1); + +         if (current_min > current_max) +         { +            *result = current_min; + +            return -1; +         } +      } +      else if (cmp < 0) +      { +         if ((current_min > current_max) || (i == 0)) +         { +            *result = current_min; + +            return -1; +         } + +         current_max = (i - 1); +      } +      else +      { +         *result = i; + +         return 0; +      } +   } +} + +int ZoO_knowledge_find_preceding_words +( +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index sequence [const restrict], +   const ZoO_index markov_order, /* Pre: (> 0) */ +   const ZoO_index * restrict preceding_words [const restrict static 1], +   const ZoO_index * restrict preceding_words_weights [const restrict static 1], +   ZoO_index preceding_words_weights_sum [const restrict static 1] +) +{ +   /* This is a binary search */ +   int cmp; +   ZoO_index i, current_min, current_max; +   ZoO_index candidate_id; +   const ZoO_index markov_sequence_length = (markov_order - 1); + +   if (sequence[markov_sequence_length] >= k->words_length) +   { +      ZoO_S_ERROR +      ( +         "Attempting to find the preceding words of an unknown word." +      ); + +      *preceding_words = (const ZoO_index *) NULL; +      *preceding_words_weights = (const ZoO_index *) NULL; +      *preceding_words_weights_sum = 0; + +      return -1; +   } + +   *preceding_words_weights_sum = +      k->words[sequence[markov_sequence_length]].occurrences; + +   if (markov_order == 1) +   { +      /* Special case: empty sequences. */ +      *preceding_words = +         (const ZoO_index *) k->words +         [ +            sequence[markov_sequence_length] +         ].preceded.targets; + +      *preceding_words_weights = +         (const ZoO_index *) k->words +         [ +            sequence[markov_sequence_length] +         ].preceded.targets_occurrences; + +      return 0; +   } + +   /* Handles the case where the list is empty ********************************/ +   current_max = +      k->words[sequence[markov_sequence_length]].preceded.sequences_length; + +   if (current_max == 0) +   { +      *preceding_words = (const ZoO_index *) NULL; +      *preceding_words_weights = (const ZoO_index *) NULL; +      *preceding_words_weights_sum = 0; + +      ZoO_S_ERROR +      ( +         "Attempting to find the preceding words of a sequence that never had " +         "any." +      ); + +      return -2; +   } +   /***************************************************************************/ + +   current_min = 0; +   current_max -= 1; + +   for (;;) +   { +      i = (current_min + ((current_max - current_min) / 2)); + +      cmp = +         ZoO_sequence_cmp +         ( +            sequence, +            markov_sequence_length, +            k->words[sequence[markov_sequence_length]].preceded.sequences[i], +            markov_sequence_length +         ); + +      if (cmp > 0) +      { +         current_min = (i + 1); + +         if (current_min > current_max) +         { +            *preceding_words = (const ZoO_index *) NULL; +            *preceding_words_weights = (const ZoO_index *) NULL; +            *preceding_words_weights_sum = 0; + +            return -2; +         } +      } +      else if (cmp < 0) +      { +         if ((current_min > current_max) || (i == 0)) +         { +            *preceding_words = (const ZoO_index *) NULL; +            *preceding_words_weights = (const ZoO_index *) NULL; +            *preceding_words_weights_sum = 0; + +            return -2; +         } + +         current_max = (i - 1); +      } +      else +      { +         *preceding_words = +            k->words +            [ +               sequence[markov_sequence_length] +            ].preceded.targets[i]; + +         *preceding_words_weights = +            k->words +            [ +               sequence[markov_sequence_length] +            ].preceded.targets_occurrences[i]; + +         return 0; +      } +   } +} + +int ZoO_knowledge_find_following_words +( +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index sequence [const restrict], +   const ZoO_index sequence_length, +   const ZoO_index markov_order, +   const ZoO_index * restrict following_words [const restrict static 1], +   const ZoO_index * restrict following_words_weights [const restrict static 1], +   ZoO_index following_words_weights_sum [const restrict static 1] +) +{ +   /* This is a binary search */ +   int cmp; +   ZoO_index i, current_min, current_max; +   ZoO_index candidate_id; +   const ZoO_index markov_sequence_length = (markov_order - 1); +   const ZoO_index word_of_interest = +      (sequence_length - markov_sequence_length) - 1; + +   if (sequence[word_of_interest] >= k->words_length) +   { +      ZoO_S_ERROR +      ( +         "Attempting to find the following words of an unknown word." +      ); + +      *following_words = (const ZoO_index *) NULL; +      *following_words_weights = (const ZoO_index *) NULL; +      *following_words_weights_sum = 0; + +      return -1; +   } + +   *following_words_weights_sum = +      k->words[sequence[word_of_interest]].occurrences; + +   if (markov_order == 1) +   { +      /* Special case: empty sequences. */ +      *following_words = +         (const ZoO_index *) k->words +         [ +            sequence[word_of_interest] +         ].preceded.targets; + +      *following_words_weights = +         (const ZoO_index *) k->words +         [ +            sequence[word_of_interest] +         ].preceded.targets_occurrences; + +      return 0; +   } + +   /* Handles the case where the list is empty ********************************/ +   current_max = k->words[sequence[word_of_interest]].preceded.sequences_length; + +   if (current_max == 0) +   { +      *following_words = (const ZoO_index *) NULL; +      *following_words_weights = (const ZoO_index *) NULL; +      *following_words_weights_sum = 0; + +      ZoO_S_WARNING +      ( +         "Attempting to find the following words of a sequence that never had " +         "any." +      ); + +      return -2; +   } +   /***************************************************************************/ + +   current_min = 0; +   current_max -= 1; + +   for (;;) +   { +      i = (current_min + ((current_max - current_min) / 2)); + +      cmp = +         ZoO_sequence_cmp +         ( +            (sequence + word_of_interest), +            markov_sequence_length, +            k->words[sequence[word_of_interest]].followed.sequences[i], +            markov_sequence_length +         ); + +      if (cmp > 0) +      { +         current_min = (i + 1); + +         if (current_min > current_max) +         { +            *following_words = (const ZoO_index *) NULL; +            *following_words_weights = (const ZoO_index *) NULL; +            *following_words_weights_sum = 0; + +            return -2; +         } +      } +      else if (cmp < 0) +      { +         if ((current_min > current_max) || (i == 0)) +         { +            *following_words = (const ZoO_index *) NULL; +            *following_words_weights = (const ZoO_index *) NULL; +            *following_words_weights_sum = 0; + +            return -2; +         } + +         current_max = (i - 1); +      } +      else +      { +         *following_words = +            k->words +            [ +               sequence[markov_sequence_length] +            ].followed.targets[i]; + +         *following_words_weights = +            k->words +            [ +               sequence[markov_sequence_length] +            ].followed.targets_occurrences[i]; + +         return 0; +      } +   } +} diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h index e92b5e1..aea11da 100644 --- a/src/core/knowledge_types.h +++ b/src/core/knowledge_types.h @@ -1,62 +1,34 @@  #ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_  #define _ZoO_CORE_KNOWLEDGE_TYPES_H_ -#include "../pervasive.h" +#include "../core/index_types.h" +#include "../core/char_types.h" -#define ZoO_WORD_START_OF_LINE 0 -#define ZoO_WORD_END_OF_LINE   1 - -#if ZoO_MARKOV_ORDER == 1 -   #define ZoO_SEQUENCE_SIZE 1 -#else -   #define ZoO_SEQUENCE_SIZE ZoO_MARKOV_ORDER - 1 -#endif - -#define ZoO_S_LINK_SIZE (ZoO_SEQUENCE_SIZE + 1) - -/* XXX: are we as close to immutable as we want to be? */ -extern unsigned int const ZoO_knowledge_punctuation_chars_count; -extern const ZoO_char const ZoO_knowledge_punctuation_chars[8]; -extern unsigned int const ZoO_knowledge_forbidden_chars_count; -extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; - - -enum ZoO_knowledge_special_effect +struct ZoO_knowledge_sequence_collection  { -   ZoO_WORD_HAS_NO_EFFECT, -   ZoO_WORD_ENDS_SENTENCE, -   ZoO_WORD_STARTS_SENTENCE, -   ZoO_WORD_REMOVES_LEFT_SPACE, -   ZoO_WORD_REMOVES_RIGHT_SPACE -}; - -struct ZoO_knowledge_link -{ -   ZoO_index sequence[ZoO_SEQUENCE_SIZE]; -   ZoO_index occurrences; -   ZoO_index targets_count; -   ZoO_index * targets_occurrences; -   ZoO_index * targets; +   ZoO_index ** sequences; +   ZoO_index sequences_length; +   ZoO_index * sequences_sorted; +   ZoO_index * occurrences; +   ZoO_index ** targets; +   ZoO_index * targets_length; +   ZoO_index ** targets_occurrences;  };  struct ZoO_knowledge_word  { -   size_t word_size;     ZoO_char * word; -   enum ZoO_knowledge_special_effect special; +   size_t word_size;     ZoO_index occurrences; -   ZoO_index forward_links_count; -   ZoO_index backward_links_count; -   struct ZoO_knowledge_link * forward_links; -   struct ZoO_knowledge_link * backward_links; +   struct ZoO_knowledge_sequence_collection followed; +   struct ZoO_knowledge_sequence_collection preceded;  }; -  struct ZoO_knowledge  { -   ZoO_index words_count; -   ZoO_index * sorted_indices;     struct ZoO_knowledge_word * words; +   ZoO_index words_length; +   ZoO_index * words_sorted;  };  #endif diff --git a/src/core/sequence.c b/src/core/sequence.c index 67174d1..9e370a3 100644 --- a/src/core/sequence.c +++ b/src/core/sequence.c @@ -1,129 +1,75 @@  #include <stdlib.h>  #include <string.h> -#include "../io/error.h" -#include "../tool/sorted_list.h" +#include "../core/index.h" -#include "knowledge.h" +#include "sequence.h" -static int cmp_seq_link +/* See "sequence.h" */ +int ZoO_sequence_cmp  ( -   const void * const a, -   const void * const b, -   const void * const other +   const ZoO_index sequence_a [const], +   const ZoO_index sequence_a_length, +   const ZoO_index sequence_b [const], +   const ZoO_index sequence_b_length  )  { -   ZoO_index j; -   const ZoO_index * sequence; -   const struct ZoO_knowledge_link * link; +   ZoO_index min_length; +   ZoO_index i; -   sequence = (const ZoO_index *) a; -   link = (const struct ZoO_knowledge_link *) b; +   if (sequence_a_length < sequence_b_length) +   { +      min_length = sequence_a_length; +   } +   else +   { +      min_length = sequence_b_length; +   } -   for (j = 0; j < ZoO_SEQUENCE_SIZE; ++j) +   for (i = 0; i < min_length; ++i)     { -      if (sequence[j] < link->sequence[j]) +      if (sequence_a[i] < sequence_b[i])        {           return -1;        } -      else if (sequence[j] > link->sequence[j]) +      else if (sequence_b[i] > sequence_b[i])        {           return 1;        } -   } - -   return 0; -} - -int ZoO_knowledge_find_link -( -   ZoO_index const links_count, -   struct ZoO_knowledge_link links [const], -   ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], -   ZoO_index result [const restrict static 1] -) -{ -   return -      ZoO_sorted_list_index_of +      else if        ( -         links_count, -         (void const *) links, -         (void const *) sequence, -         sizeof(struct ZoO_knowledge_link), -         cmp_seq_link, -         (void const *) NULL, -         result -      ); -} - -int ZoO_knowledge_get_link -( -   ZoO_index links_count [const], -   struct ZoO_knowledge_link * links [const], -   ZoO_index const sequence [const restrict static ZoO_SEQUENCE_SIZE], -   ZoO_index result [const restrict static 1] -) -{ -   struct ZoO_knowledge_link * new_p; +         (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) +         && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) +      ) +      { +         return 0; +      } +   } -   if -   ( -      ZoO_sorted_list_index_of -      ( -         *links_count, -         (void const *) *links, -         (void const *) sequence, -         sizeof(struct ZoO_knowledge_link), -         cmp_seq_link, -         (void const *) NULL, -         result -      ) == 0 -   ) +   if (sequence_a_length < sequence_b_length)     { -      return 0; +      if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) +      { +         return 0; +      } +      else +      { +         return -1; +      }     } - -   *links_count += 1; - -   new_p = -      (struct ZoO_knowledge_link *) realloc -      ( -         (void *) *links, -         (sizeof(struct ZoO_knowledge_link) * (*links_count)) -      ); - -   if (new_p == (struct ZoO_knowledge_link *) NULL) +   else if (sequence_a_length > sequence_b_length)     { -      *links_count -= 1; - -      return -1; +      if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) +      { +         return 0; +      } +      else +      { +         return 1; +      }     } - -   if (*result < (*links_count - 1)) +   else     { -      memmove( -         (void *) (new_p + *result + 1), -         (const void *) (new_p + *result), -         (sizeof(struct ZoO_knowledge_link) * (*links_count - 1 - *result)) -      ); +      return 0;     } - -   *links = new_p; - -   new_p += *result; - -   memcpy -   ( -      (void *) new_p->sequence, -      (void const *) sequence, -      /* can be zero */ -      (sizeof(ZoO_index) * ZoO_SEQUENCE_SIZE) -   ); - -   new_p->occurrences = 0; -   new_p->targets_count = 0; -   new_p->targets_occurrences = (ZoO_index *) NULL; -   new_p->targets = (ZoO_index *) NULL; - -   return 0;  } diff --git a/src/core/sequence.h b/src/core/sequence.h index fb4b628..e609b4d 100644 --- a/src/core/sequence.h +++ b/src/core/sequence.h @@ -2,7 +2,7 @@  #define _ZoO_CORE_SEQUENCE_H_  #include "../core/index_types.h" -#include "../core/knownledge_types.h" +#include "../core/knowledge_types.h"  #include "sequence_types.h" @@ -27,7 +27,7 @@   *    (knows {k} {initial_word})   *    (initialized {k})   */ -int ZoO_create_sequence_from +int ZoO_sequence_create_from  (     const ZoO_index initial_word,     ZoO_index credits [const restrict], @@ -37,4 +37,23 @@ int ZoO_create_sequence_from     size_t sequence_size [const restrict static 1]  ); +/* + * Compares two sequences. + * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated + * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and + * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to + * contain ZoO_END_OF_SEQUENCE. + * Return: + *    1 iff {sequence_a} should be considered being more than {sequence_b} + *    0 iff {sequence_a} should be considered being equal to {sequence_b} + *    -1 iff {sequence_a} should be considered being less than {sequence_b} + */ +int ZoO_sequence_cmp +( +   const ZoO_index sequence_a [const], +   const ZoO_index sequence_a_length, +   const ZoO_index sequence_b [const], +   const ZoO_index sequence_b_length +); +  #endif diff --git a/src/core/create_sequence.c b/src/core/sequence_creation.c index 6b2cb62..b1f0f36 100644 --- a/src/core/create_sequence.c +++ b/src/core/sequence_creation.c @@ -41,59 +41,6 @@ static ZoO_index weighted_random_pick     return result;  } -/* - * FIXME: This does not belong here. - * Calculates the size the sentence will have upon addition of the word, taking - * into account {effect}. - * Returns: - *    0 on success. - *    -1 iff adding the word would overflow {sentence_size}. - * Post: - *    (initialized new_size) - */ -static int get_new_size -( -   const size_t word_size, -   const size_t sentence_size, -   const enum ZoO_knowledge_special_effect effect, -   size_t new_size [const restrict static 1] -) -{ -   size_t added_size; - -   switch (effect) -   { -      case ZoO_WORD_HAS_NO_EFFECT: -         /* word also contains an '\0', which we will replace by a ' ' */ -         added_size = word_size; -         break; - -      case ZoO_WORD_ENDS_SENTENCE: -      case ZoO_WORD_STARTS_SENTENCE: -         added_size = 0; -         break; - -      case ZoO_WORD_REMOVES_LEFT_SPACE: -      case ZoO_WORD_REMOVES_RIGHT_SPACE: -         /* word also contains an '\0', which we will remove. */ -         added_size = (word_size - 1); -         break; -   } - -   if ((SIZE_MAX - word_size) > sentence_size) -   { -      /* New size Would overflow. */ -      *new_size = sentence_size; - -      return -1; -   } - -   /* Safe: (=< SIZE_MAX (+ sentence_size added_size)) */ -   *new_size = (sentence_size + added_size); - -   return 0; -} -  /******************************************************************************/  /** ADDING ELEMENTS TO THE LEFT ***********************************************/  /******************************************************************************/ @@ -173,12 +120,14 @@ static int left_append   * Pre:   *    (initialized {sequence})   *    (initialized {k}) - *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + *    (> {markov_order} 0) + *    (initialized {*sequence[0..({markov_order} - 1)]})   */  static int extend_left  (     ZoO_index * sequence [const restrict static 1],     const size_t sequence_size, +   const ZoO_index markov_order,     const struct ZoO_knowledge k [const restrict static 1]  )  { @@ -188,10 +137,11 @@ static int extend_left     if     ( -      ZoO_knowledge_get_preceding_words +      ZoO_knowledge_find_preceding_words        (           k,           *sequence, +         markov_order,           &preceding_words,           &preceding_words_weights,           &preceding_words_weights_sum @@ -242,12 +192,14 @@ static int extend_left   *    (initialized {sequence})   *    (initialized {sequence_size})   *    (initialized {k}) + *    (> {markov_order} 0)   *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]})   */  static int complete_left_part_of_sequence  (     ZoO_index * sequence [restrict static 1],     size_t sequence_size [const restrict static 1], +   const ZoO_index markov_order,     ZoO_index credits [const restrict],     const struct ZoO_knowledge k [const restrict static 1]  ) @@ -256,7 +208,7 @@ static int complete_left_part_of_sequence     {        if ((credits == (ZoO_index *) NULL) || (*credits > 0))        { -         if (extend_left(sequence, *sequence_size, k) < 0) +         if (extend_left(sequence, *sequence_size, markov_order, k) < 0)           {              /* We are sure *sequence[0] is defined. */              if (*sequence[0] == ZoO_START_OF_SEQUENCE_ID) @@ -386,12 +338,14 @@ static int right_append   * Pre:   *    (initialized {sequence})   *    (initialized {k}) + *    (> {markov_order} 0)   *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]})   */  static int extend_right  (     ZoO_index * sequence [const restrict static 1],     const size_t sequence_size, +   const ZoO_index markov_order,     const ZoO_index sequence_length,     const struct ZoO_knowledge k [const restrict static 1]  ) @@ -403,11 +357,12 @@ static int extend_right     if     ( -      ZoO_knowledge_get_following_words +      ZoO_knowledge_find_following_words        (           k,           *sequence,           sequence_length, +         markov_order,           &following_words,           &following_words_weights,           &following_words_weights_sum @@ -459,12 +414,14 @@ static int extend_right   *    (initialized {sequence})   *    (initialized {*sequence_size})   *    (initialized {k}) + *    (> {markov_order} 0)   *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]})   */  static int complete_right_part_of_sequence  (     ZoO_index * sequence [const restrict static 1],     size_t sequence_size [const restrict static 1], +   const ZoO_index markov_order,     ZoO_index credits [const restrict],     const struct ZoO_knowledge k [const restrict static 1]  ) @@ -477,7 +434,17 @@ static int complete_right_part_of_sequence     {        if ((credits == (ZoO_index *) NULL) || (*credits > 0))        { -         if (extend_right(sequence, *sequence_size, sequence_length, k) < 0) +         if +         ( +            extend_right +            ( +               sequence, +               *sequence_size, +               markov_order, +               sequence_length, +               k +            ) < 0 +         )           {              /* Safe: (> sequence_length 1) */              if (*sequence[(sequence_length - 1)] == ZoO_END_OF_SEQUENCE_ID) @@ -570,7 +537,7 @@ static int allocate_initial_sequence        return -1;     } -   *sequence_size = ((size_t) markov_order) * sizeof(ZoO_index); +   *sequence_size = (((size_t) markov_order) * sizeof(ZoO_index));     *sequence = (ZoO_index *) malloc(*sequence_size);     if (*sequence == (void *) NULL) @@ -612,8 +579,8 @@ static int initialize_sequence     const struct ZoO_knowledge k [const static 1]  )  { -   const ZoO_index * const restrict * restrict following_sequences; -   const ZoO_index * restrict following_sequences_weights; +   const ZoO_index * const restrict * following_sequences; +   const ZoO_index * following_sequences_weights;     ZoO_index following_sequences_weights_sum;     ZoO_index chosen_sequence; @@ -656,7 +623,7 @@ static int initialize_sequence     (        (void *) (sequence + 1),        (const void *) (following_sequences + chosen_sequence), -      (((size_t) markov_order) - 1) * sizeof(ZoO_index) +      ((((size_t) markov_order) - 1) * sizeof(ZoO_index))     );     return 0; @@ -667,7 +634,7 @@ static int initialize_sequence  /******************************************************************************/  /* See "sequence.h" */ -int ZoO_create_sequence_from +int ZoO_sequence_create_from  (     const ZoO_index initial_word,     ZoO_index credits [const restrict], @@ -696,6 +663,7 @@ int ZoO_create_sequence_from        (           sequence,           sequence_size, +         markov_order,           credits,           k        ) < 0 @@ -713,6 +681,7 @@ int ZoO_create_sequence_from        (           sequence,           sequence_size, +         markov_order,           credits,           k        ) < 0 diff --git a/src/pervasive.h b/src/pervasive.h index 9e1faf7..b830326 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -39,16 +39,6 @@     #define ZoO_DEFAULT_REPLY_RATE         8  #endif -#ifndef ZoO_MARKOV_ORDER -   #define ZoO_MARKOV_ORDER               3 -#endif - - -/* ZoO_char = UTF-8 char */ -typedef char ZoO_char; -/* Functions that can handle UTF-8 'char' will use this symbol. */ -#define ZoO_CHAR_STRING_SYMBOL "%s" -  #define ZoO__TO_STRING(x) #x  #define ZoO_TO_STRING(x) ZoO__TO_STRING(x)  #define ZoO_ISOLATE(a) do {a} while (0) | 


