| summaryrefslogtreecommitdiff | 
diff options
| author | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2017-01-18 19:09:16 +0100 | 
|---|---|---|
| committer | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2017-01-18 19:09:16 +0100 | 
| commit | 0d49fb74eadcf933f696420cd182077927680d26 (patch) | |
| tree | 9220d260ce878f369138da12dae0300cf9ade5c9 /src | |
| parent | 24afb3e60bafd98e6a83dcb41ee6a7f7d41e76bc (diff) | |
Done with 'core', starting to work on 'knowledge'.
Diffstat (limited to 'src')
| -rw-r--r-- | src/CMakeLists.txt | 10 | ||||
| -rw-r--r-- | src/cli/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/cli/cli.h (renamed from src/io/error.h) | 13 | ||||
| -rw-r--r-- | src/cli/parameters.c (renamed from src/io/parameters.c) | 0 | ||||
| -rw-r--r-- | src/cli/parameters.h (renamed from src/io/parameters.h) | 0 | ||||
| -rw-r--r-- | src/cli/parameters_types.h | 67 | ||||
| -rw-r--r-- | src/core/CMakeLists.txt | 9 | ||||
| -rw-r--r-- | src/core/assimilate.c | 281 | ||||
| -rw-r--r-- | src/core/char.c | 18 | ||||
| -rw-r--r-- | src/core/char.h | 35 | ||||
| -rw-r--r-- | src/core/char_types.h | 6 | ||||
| -rw-r--r-- | src/core/index.c | 61 | ||||
| -rw-r--r-- | src/core/index.h | 11 | ||||
| -rw-r--r-- | src/core/index_types.h | 2 | ||||
| -rw-r--r-- | src/core/sequence.c | 84 | ||||
| -rw-r--r-- | src/core/sequence.h | 20 | ||||
| -rw-r--r-- | src/core/sequence_creation.c | 9 | ||||
| -rw-r--r-- | src/core/sequence_from_string.c | 315 | ||||
| -rw-r--r-- | src/core/sequence_types.h | 3 | ||||
| -rw-r--r-- | src/file/data_input.c (renamed from src/io/data_input.c) | 0 | ||||
| -rw-r--r-- | src/file/data_input.h (renamed from src/io/data_input.h) | 0 | ||||
| -rw-r--r-- | src/file/data_input_types.h (renamed from src/io/data_input_types.h) | 0 | ||||
| -rw-r--r-- | src/file/data_output.c (renamed from src/io/data_output.c) | 1 | ||||
| -rw-r--r-- | src/file/data_output.h (renamed from src/io/data_output.h) | 0 | ||||
| -rw-r--r-- | src/io/CMakeLists.txt | 9 | ||||
| -rw-r--r-- | src/io/parameters_types.h | 21 | ||||
| -rw-r--r-- | src/irc/network.c (renamed from src/io/network.c) | 0 | ||||
| -rw-r--r-- | src/irc/network.h (renamed from src/io/network.h) | 0 | ||||
| -rw-r--r-- | src/irc/network_types.h (renamed from src/io/network_types.h) | 0 | ||||
| -rw-r--r-- | src/knowledge/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/knowledge/knowledge.c (renamed from src/core/knowledge.c) | 8 | ||||
| -rw-r--r-- | src/knowledge/knowledge.h (renamed from src/core/knowledge.h) | 27 | ||||
| -rw-r--r-- | src/knowledge/knowledge_finalize.c (renamed from src/core/knowledge_finalize.c) | 7 | ||||
| -rw-r--r-- | src/knowledge/knowledge_learn_sequence.c | 324 | ||||
| -rw-r--r-- | src/knowledge/knowledge_learn_word.c | 276 | ||||
| -rw-r--r-- | src/knowledge/knowledge_search.c (renamed from src/core/knowledge_search.c) | 7 | ||||
| -rw-r--r-- | src/knowledge/knowledge_types.h (renamed from src/core/knowledge_types.h) | 5 | ||||
| -rw-r--r-- | src/main.c (renamed from src/core/main.c) | 0 | ||||
| -rw-r--r-- | src/pervasive.h | 34 | 
39 files changed, 1267 insertions, 413 deletions
| diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 76a73ed..594b14f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,13 @@ +add_subdirectory(cli)  add_subdirectory(core) -add_subdirectory(io) +add_subdirectory(file) +add_subdirectory(irc) +add_subdirectory(knowledge)  add_subdirectory(tool) +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/main.c +) +  set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt new file mode 100644 index 0000000..94e6337 --- /dev/null +++ b/src/cli/CMakeLists.txt @@ -0,0 +1,6 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c +) +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/io/error.h b/src/cli/cli.h index be7359f..5aec25b 100644 --- a/src/io/error.h +++ b/src/cli/cli.h @@ -1,15 +1,10 @@ -#ifndef _ZoO_IO_ERROR_H_ -#define _ZoO_IO_ERROR_H_ +#ifndef _ZoO_CLI_CLI_H_ +#define _ZoO_CLI_CLI_H_  #include <stdio.h>  #include "../pervasive.h" -#define ZoO_DEBUG_ALL 1 - -#ifndef ZoO_DEBUG_ALL -   #define ZoO_DEBUG_ALL 0 -#endif  #ifndef ZoO_DEBUG_PROGRAM_FLOW     #define ZoO_DEBUG_PROGRAM_FLOW   (0 || ZoO_DEBUG_ALL) @@ -23,7 +18,9 @@     #define ZoO_DEBUG_LEARNING       (0 || ZoO_DEBUG_ALL)  #endif -#define ZoO_DEBUG_NETWORK  1 +#ifndef ZoO_DEBUG_NETWORK +   #define ZoO_DEBUG_NETWORK  1 +#endif  #ifndef ZoO_DEBUG_NETWORK     #define ZoO_DEBUG_NETWORK        (0 || ZoO_DEBUG_ALL) diff --git a/src/io/parameters.c b/src/cli/parameters.c index 77c33aa..77c33aa 100644 --- a/src/io/parameters.c +++ b/src/cli/parameters.c diff --git a/src/io/parameters.h b/src/cli/parameters.h index 1011e2b..1011e2b 100644 --- a/src/io/parameters.h +++ b/src/cli/parameters.h diff --git a/src/cli/parameters_types.h b/src/cli/parameters_types.h new file mode 100644 index 0000000..15b5254 --- /dev/null +++ b/src/cli/parameters_types.h @@ -0,0 +1,67 @@ +#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ +#define _ZoO_IO_PARAMETERS_TYPES_H_ + +#include "../pervasive.h" + +/******************************************************************************/ +/** DEFAULT VALUES ************************************************************/ +/******************************************************************************/ + +#ifndef ZoO_DEFAULT_DATA_FILENAME +   #define ZoO_DEFAULT_DATA_FILENAME      "./memory.txt" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR +   #define ZoO_DEFAULT_IRC_SERVER_ADDR    "irc.foonetic.net" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_PORT +   #define ZoO_DEFAULT_IRC_SERVER_PORT    "6667" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL +   #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" +#endif + +#ifndef ZoO_DEFAULT_IRC_USERNAME +   #define ZoO_DEFAULT_IRC_USERNAME       "zeroofone" +#endif + +#ifndef ZoO_DEFAULT_IRC_REALNAME +   #define ZoO_DEFAULT_IRC_REALNAME       "Zero of One (bot)" +#endif + +#ifndef ZoO_DEFAULT_REPLY_RATE +   #define ZoO_DEFAULT_REPLY_RATE         8 +#endif + +/******************************************************************************/ +/** DEBUG LEVELS **************************************************************/ +/******************************************************************************/ + +#ifndef ZoO_DEBUG_PARAMETERS +   #define ZoO_DEBUG_PARAMETERS (0 || ZoO_DEBUG_ALL) +#endif + +/******************************************************************************/ +/** FUNCTIONS *****************************************************************/ +/******************************************************************************/ + +struct ZoO_parameters +{ +   const char * restrict data_filename; +   const char * restrict new_data_filename; + +   const char * restrict irc_server_addr; +   const char * restrict irc_server_port; +   const char * restrict irc_server_channel; +   const char * restrict irc_username; +   const char * restrict irc_realname; + +   int reply_rate; + +   int aliases_count; +   const char * restrict * restrict aliases; +}; + +#endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index fe28080..1e1daa8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,13 +1,10 @@  set(     SRC_FILES ${SRC_FILES}     ${CMAKE_CURRENT_SOURCE_DIR}/char.c -   ${CMAKE_CURRENT_SOURCE_DIR}/main.c -   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c -   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c -   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c -   ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c -   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c +   ${CMAKE_CURRENT_SOURCE_DIR}/index.c     ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_from_string.c  )  set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/core/assimilate.c b/src/core/assimilate.c deleted file mode 100644 index 7f03e1b..0000000 --- a/src/core/assimilate.c +++ /dev/null @@ -1,281 +0,0 @@ -#include <stdlib.h> -#include <string.h> - -#include "../io/error.h" - -#include "knowledge.h" - -/** Functions to assimilate sentences using a ZoO_knowledge structure *********/ - - -static int add_sequence -( -   ZoO_index links_count [const], -   struct ZoO_knowledge_link * links [const], -   ZoO_index const sequence [const restrict static ZoO_MARKOV_ORDER], -   ZoO_index const target_i, -   ZoO_index const offset -) -{ -   ZoO_index link_index, i; -   struct ZoO_knowledge_link * link; -   ZoO_index * new_p; - -   if -   ( -      ZoO_knowledge_get_link -      ( -         links_count, -         links, -         (sequence + offset), -         &link_index -      ) < 0 -   ) -   { -      return -1; -   } - -   link = (*links + link_index); -   link->occurrences += 1; - -   for (i = 0; i < link->targets_count; ++i) -   { -      if (link->targets[i] == sequence[target_i]) -      { -         link->targets_occurrences[i] += 1; - -         return 0; -      } -   } - -   link->targets_count += 1; - -   new_p = -      (ZoO_index *) realloc -      ( -         (void *) link->targets, -         (sizeof(ZoO_index) * link->targets_count) -      ); - -   if (new_p == (ZoO_index *) NULL) -   { -      link->targets_count -= 1; - -      /* TODO: err. */ -      return -1; -   } - -   link->targets = new_p; -   link->targets[link->targets_count - 1] = sequence[target_i]; - -   new_p = -      (ZoO_index *) realloc -      ( -         (void *) link->targets_occurrences, -         (sizeof(ZoO_index) * link->targets_count) -      ); - -   if (new_p == (ZoO_index *) NULL) -   { -      link->targets_count -= 1; - -      /* TODO: err. */ -      return -1; -   } - -   link->targets_occurrences = new_p; -   link->targets_occurrences[link->targets_count - 1] = 1; - -   return 0; -} - -static int add_word_occurrence -( -   struct ZoO_knowledge k [const restrict static 1], -   ZoO_index const sequence [const static ((ZoO_MARKOV_ORDER * 2) + 1)] -) -{ -   ZoO_index w; -   int error; - -   w = sequence[ZoO_MARKOV_ORDER]; - -   error = -      add_sequence -      ( -         &(k->words[w].forward_links_count), -         &(k->words[w].forward_links), -         sequence + (ZoO_MARKOV_ORDER + 1), -         (ZoO_MARKOV_ORDER - 1), -         0 -      ); - -   error = -      ( -         add_sequence -         ( -            &(k->words[w].backward_links_count), -            &(k->words[w].backward_links), -            sequence, -            0, -            1 -         ) -         | error -      ); - -   return error; -} - -static int should_assimilate -( -   struct ZoO_strings string [const restrict static 1], -   ZoO_index const aliases_count, -   const char * restrict aliases [const restrict static aliases_count] -) -{ -   ZoO_index i; - -   /* Don't assimilate empty strings. */ -   if (string->words_count == 0) -   { -      return 0; -   } - -   /* Don't assimilate things that start with our name. */ -   for (i = 0; i < aliases_count; ++i) -   { -      if (ZoO_IS_PREFIX(aliases[i], string->words[0])) -      { -         return 0; -      } -   } - -   return 1; -} - -static int init_sequence -( -   struct ZoO_knowledge k [const static 1], -   struct ZoO_strings string [const restrict static 1], -   ZoO_index sequence [const restrict static ((ZoO_MARKOV_ORDER * 2) + 1)] -) -{ -   ZoO_index i; - -   /* We are going to link this sequence to ZoO_WORD_START_OF_LINE */ -   sequence[ZoO_MARKOV_ORDER] = ZoO_WORD_START_OF_LINE; - -   for (i = 1; i <= ZoO_MARKOV_ORDER; ++i) -   { -      sequence[ZoO_MARKOV_ORDER - i] = ZoO_WORD_START_OF_LINE; - -      if (i <= string->words_count) -      { -         if -         ( -            ZoO_knowledge_learn -            ( -               k, -               string->words[i - 1], -               (sequence + (ZoO_MARKOV_ORDER + i)) -            ) < 0 -         ) -         { -            return -1; -         } -      } -      else -      { -         sequence[ZoO_MARKOV_ORDER + i] = ZoO_WORD_END_OF_LINE; -      } -   } - -   return 0; -} - -int ZoO_knowledge_assimilate -( -   struct ZoO_knowledge k [const static 1], -   struct ZoO_strings string [const restrict static 1], -   ZoO_index const aliases_count, -   const char * restrict aliases [const restrict static aliases_count] -) -{ -   int error; -   ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1]; -   ZoO_index next_word, new_word, new_word_id; - -   if (!should_assimilate(string, aliases_count, aliases)) -   { -      return 0; -   } - -   if (init_sequence(k, string, sequence) < 0) -   { -      return -1; -   } - -   if (add_word_occurrence(k, sequence) < 0) -   { -      error = -1; - -      /* There's a pun... */ -      ZoO_S_WARNING("Could not add a link between words."); - -      return -1; -   } - -   error = 0; - -   next_word = 0; -   new_word = ZoO_MARKOV_ORDER; - -   while (next_word <= (string->words_count + ZoO_MARKOV_ORDER)) -   { -      if (new_word < string->words_count) -      { -         /* prevents words [restrict], k [restrict] */ -         if (ZoO_knowledge_learn(k, string->words[new_word], &new_word_id) < 0) -         { -            return -1; -         } -      } -      else -      { -         new_word_id = ZoO_WORD_END_OF_LINE; -      } - -      memmove -      ( -         (void *) sequence, -         (const void *) (sequence + 1), -         /* Accepts 0. */ -         (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER * 2)) -      ); - -      sequence[ZoO_MARKOV_ORDER * 2] = new_word_id; - -      if (add_word_occurrence(k, sequence) < 0) -      { -         error = -1; - -         /* There's a pun... */ -         ZoO_S_WARNING("Could not add a link between words."); - -         return -1; -      } - -      /* -       * Safe: -       *  - next_word < words_count -       *  - words_count =< ZoO_INDEX_MAX -       *  ---- -       *  next_word < ZoO_INDEX_MAX -       */ -      next_word += 1; -      new_word += 1; -   } - -   return error; -} - diff --git a/src/core/char.c b/src/core/char.c index 39ca72e..9297643 100644 --- a/src/core/char.c +++ b/src/core/char.c @@ -2,6 +2,18 @@  #include "char.h" +/* See: "char.c" */ +ZoO_char ZoO_char_to_lowercase (const ZoO_char c) +{ +   if ((c >= 'A') && (c <= 'Z')) +   { +      return 'z' - ('Z' - c); +   } + +   return c; +} + +/* See: "char.c" */  int ZoO_char_is_banned (const ZoO_char c)  {     switch (c) @@ -21,6 +33,7 @@ int ZoO_char_is_banned (const ZoO_char c)     }  } +/* See: "char.c" */  int ZoO_char_is_punctuation (const ZoO_char c)  {     switch (c) @@ -38,11 +51,14 @@ int ZoO_char_is_punctuation (const ZoO_char c)     }  } +/* See: "char.c" */  int ZoO_word_cmp  (     const ZoO_char word_a [const static 1], +   const size_t word_a_size,     const ZoO_char word_b [const static 1]  )  { -   return strcmp((const char *) word_a, (const char *) word_b); +   return strncmp((const char *) word_a, (const char *) word_b, word_a_size);  } + diff --git a/src/core/char.h b/src/core/char.h index 772a3a2..2b4a355 100644 --- a/src/core/char.h +++ b/src/core/char.h @@ -3,19 +3,42 @@  #include "char_types.h" -enum ZoO_word_property ZoO_get_word_property -( -   const ZoO_char word [const restrict], -   size_t word_size -); - +/* Compares two words. {word_a} does not have to be null terminated. */ +/*@ + @ requires null_terminated_string(word_b); + @ requires ((length(word_a) * sizeof(ZoO_char)) == word_a_size); + @ ensures ((\result == 1) || (\result == 0) || (\result == -1)); + @*/  int ZoO_word_cmp  (     const ZoO_char word_a [const static 1], +   const size_t word_a_size,     const ZoO_char word_b [const static 1]  ); +/* + * Returns the lowercase equivalent of ZoO_char that are included in ['A','Z']. + * Other ZoO_char are returned untouched. + */ +ZoO_char ZoO_char_to_lowercase (const ZoO_char c); + +/* + * Returns '1' iff {c} should be considered as an punctuation character, '0' + * otherwise. + */ +/*@ + @ ensures ((\result == 1) || (\result == 0)); + @*/  int ZoO_char_is_punctuation (const ZoO_char c); + +/* + * Returns '1' iff containing {c} means the word should not be learned. '0' + * otherwise. + */ +/*@ + @ ensures ((\result == 1) || (\result == 0)); + @*/  int ZoO_word_char_is_banned (const ZoO_char c);  #endif + diff --git a/src/core/char_types.h b/src/core/char_types.h index 67b5294..a2a736c 100644 --- a/src/core/char_types.h +++ b/src/core/char_types.h @@ -1,12 +1,16 @@  #ifndef _ZoO_CORE_CHAR_TYPES_H_  #define _ZoO_CORE_CHAR_TYPES_H_ - +/* + * FIXME: Does not belong here. + */ +/*  enum ZoO_word_property  {     ZoO_WORD_NO_PROPERTY,     ZoO_WORD_HAS_NO_LEFT_SEPARATOR,     ZoO_WORD_HAS_NO_RIGHT_SEPARATOR  }; +*/  /* ZoO_char = UTF-8 char */  typedef char ZoO_char; diff --git a/src/core/index.c b/src/core/index.c new file mode 100644 index 0000000..375e0ad --- /dev/null +++ b/src/core/index.c @@ -0,0 +1,61 @@ +#include <limits.h> +#include <stdlib.h> + +#include "index.h" + +#if (RAND_MAX < UCHAR_MAX) +   #error "RAND_MAX < UCHAR_MAX, unable to generate random numbers." +#endif + +#if (RAND_MAX == 0) +   #error "RAND_MAX is included in [0, 0]. What are you even doing?" +#endif + +/* + * Returns a random unsigned char. + */ +static unsigned char random_uchar (void) +{ +   return +   (unsigned char) +   ( +      /* FIXME: Do floats allow enough precision for this? */ +      ( +         ((float) rand()) +         / ((float) RAND_MAX) +      ) +      * ((float) UCHAR_MAX) +   ); +} + +/* See: "index.h" */ +ZoO_index ZoO_index_random (void) +{ +   ZoO_index i; +   ZoO_index result; +   unsigned char * result_bytes; + +   result_bytes = (unsigned char *) &result; + +   for (i = 0; i < sizeof(ZoO_index); ++i) +   { +      result_bytes[i] = random_uchar(); +   } + +   return result; +} + +/* See: "index.h" */ +ZoO_index ZoO_index_random_up_to (const ZoO_index max) +{ +   return +   (ZoO_index) +   ( +      /* FIXME: Do floats allow enough precision for this? */ +      ( +         ((float) ZoO_index_random()) +         / ((float) ZoO_INDEX_MAX) +      ) +      * ((float) max) +   ); +} diff --git a/src/core/index.h b/src/core/index.h index 76e3507..1417662 100644 --- a/src/core/index.h +++ b/src/core/index.h @@ -3,6 +3,17 @@  #include "index_types.h" +/* + * Returns a random ZoO_index. + */ +ZoO_index ZoO_index_random (void); + +/* + * Returns a random ZoO_index, included in [0, limit] + */ +/*@ + @ ensures (\result <= limit); + @*/  ZoO_index ZoO_index_random_up_to (const ZoO_index limit);  #endif diff --git a/src/core/index_types.h b/src/core/index_types.h index 2d769ca..ad56d52 100644 --- a/src/core/index_types.h +++ b/src/core/index_types.h @@ -1,8 +1,10 @@  #ifndef _ZoO_CORE_INDEX_TYPES_H_  #define _ZoO_CORE_INDEX_TYPES_H_ +/* Must be unsigned. */  typedef unsigned int ZoO_index; +/* Must be > 0. */  #define ZoO_INDEX_MAX UINT_MAX  #endif diff --git a/src/core/sequence.c b/src/core/sequence.c index 9e370a3..d7ff9d0 100644 --- a/src/core/sequence.c +++ b/src/core/sequence.c @@ -5,18 +5,56 @@  #include "sequence.h" +/* + * Bypass rendundant ZoO_START_OF_SEQUENCE_ID at the start of a sequence. + */ +/* ensures (*sequence_offset <= sequence_length) */ +static void bypass_redundant_sos +( +   const ZoO_index sequence [const restrict], +   const ZoO_index sequence_length, +   ZoO_index sequence_offset [const restrict static 1] +) +{ +   ZoO_index i; + +   *sequence_offset = 0; + +   for (i = 0; i < sequence_length; ++i) +   { +      if (sequence[i] != ZoO_START_OF_SEQUENCE_ID) +      { +         return; +      } +      else if (sequence[i] == ZoO_START_OF_SEQUENCE_ID) +      { +         *sequence_offset = i; +      } +   } +} + +  /* See "sequence.h" */  int ZoO_sequence_cmp  (     const ZoO_index sequence_a [const], -   const ZoO_index sequence_a_length, +   ZoO_index sequence_a_length,     const ZoO_index sequence_b [const], -   const ZoO_index sequence_b_length +   ZoO_index sequence_b_length  )  { -   ZoO_index min_length; +   ZoO_index min_length, a, b; +   ZoO_index a_offset, b_offset;     ZoO_index i; +   bypass_redundant_sos(sequence_a, sequence_a_length, &a_offset); +   bypass_redundant_sos(sequence_b, sequence_b_length, &b_offset); + +   /*@ requires (*a_offset <= sequence_a_length) @*/ +   sequence_a_length -= a_offset; +   /*@ requires (*b_offset <= sequence_b_length) @*/ +   sequence_b_length -= b_offset; +     if (sequence_a_length < sequence_b_length)     {        min_length = sequence_a_length; @@ -26,47 +64,37 @@ int ZoO_sequence_cmp        min_length = sequence_b_length;     } +   /*@ ensures (min_length <= sequence_a_length) @*/ +   /*@ ensures (min_length <= sequence_b_length) @*/ +     for (i = 0; i < min_length; ++i)     { -      if (sequence_a[i] < sequence_b[i]) +      /*@ requires ((i + a_offset) < sequence_a_length) @*/ +      a = sequence_a[i + a_offset]; +      /*@ requires ((i + b_offset) < sequence_b_length) @*/ +      b = sequence_b[i + b_offset]; + +      if (a < b)        {           return -1;        } -      else if (sequence_b[i] > sequence_b[i]) +      else if (b > a)        {           return 1;        } -      else if -      ( -         (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) -         && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) -      ) +      else if ((a == ZoO_END_OF_SEQUENCE_ID) && (b == ZoO_END_OF_SEQUENCE_ID))        {           return 0;        }     } -   if (sequence_a_length < sequence_b_length) +   if (sequence_a_length > sequence_b_length)     { -      if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) -      { -         return 0; -      } -      else -      { -         return -1; -      } +      return 1;     } -   else if (sequence_a_length > sequence_b_length) +   else if (sequence_a_length < sequence_b_length)     { -      if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) -      { -         return 0; -      } -      else -      { -         return 1; -      } +      return -1;     }     else     { diff --git a/src/core/sequence.h b/src/core/sequence.h index e609b4d..77ecd6c 100644 --- a/src/core/sequence.h +++ b/src/core/sequence.h @@ -1,11 +1,21 @@  #ifndef _ZoO_CORE_SEQUENCE_H_  #define _ZoO_CORE_SEQUENCE_H_ +#include "../core/char_types.h"  #include "../core/index_types.h" -#include "../core/knowledge_types.h" +#include "../knowledge/knowledge_types.h"  #include "sequence_types.h" +int ZoO_sequence_from_undercase_string +( +   const ZoO_char string [const restrict], +   const ZoO_index string_length, +   struct ZoO_knowledge k [const restrict static 1], +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1] +); +  /*   * Creates a sequence containing {initial_word}. The remaining elements of   * sequence are added according to what is known to {k} as being possible. @@ -42,7 +52,13 @@ int ZoO_sequence_create_from   * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated   * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and   * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to - * contain ZoO_END_OF_SEQUENCE. + * contain ZoO_END_OF_SEQUENCE. [10][ZoO_END_OF_SEQUENCE] and [10] are + * considered different, [10][ZoO_END_OF_SEQUENCE] + * and [10][ZoO_END_OF_SEQUENCE][ZoO_END_OF_SEQUENCE] are considered equal. + * Same logic is applyied for ZoO_START_OF_SEQUENCE: + * [START_OF_SEQUENCE][10] is not [10], but + * [START_OF_SEQUENCE][START_OF_SEQUENCE][10] and [START_OF_SEQUENCE][10] are + * the same.   * Return:   *    1 iff {sequence_a} should be considered being more than {sequence_b}   *    0 iff {sequence_a} should be considered being equal to {sequence_b} diff --git a/src/core/sequence_creation.c b/src/core/sequence_creation.c index 1133be9..f460629 100644 --- a/src/core/sequence_creation.c +++ b/src/core/sequence_creation.c @@ -19,6 +19,11 @@   *    (> weights_sum 0).   *    (= (sum weights) weights_sum).   */ +/*@ + @ requires (weights_sum > 0); + @ requires \valid(weights); + @ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); +@*/  static ZoO_index weighted_random_pick  (     const ZoO_index weights [const restrict static 1], @@ -29,12 +34,12 @@ static ZoO_index weighted_random_pick     accumulator = 0; -   /* Safe: Included in [0, weights_sum]. */     random_number = ZoO_index_random_up_to(weights_sum); +   /*@ ensures (0 <= random_number <= weights_sum); @*/     for (result = 0; accumulator < random_number; ++result)     { -      /* Safe: (= (sum weights) weights_sum) */ +      /*@ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); @*/        accumulator += weights[result];     } diff --git a/src/core/sequence_from_string.c b/src/core/sequence_from_string.c new file mode 100644 index 0000000..51d7049 --- /dev/null +++ b/src/core/sequence_from_string.c @@ -0,0 +1,315 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../cli/cli.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +static int add_word_id_to_sequence +( +   const ZoO_index word_id, +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1] +) +{ +   ZoO_index * new_sequence; + +   *sequence_length += 1; + +   new_sequence = +      (ZoO_index *) realloc +      ( +         (void *) *sequence, +         (((size_t) sequence_length) * sizeof(ZoO_index)) +      ); + +   if (new_sequence == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR("Unable to reallocate a sequence to add word ids to it."); + +      return -1; +   } + +   return 0; +} + +/******************************************************************************/ +/** HANDLING PUNCTUATION ******************************************************/ +/******************************************************************************/ +static int add_punctuation_to_sequence +( +   const ZoO_char string [const restrict static 1], +   const ZoO_char punctuation, +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1], +   const struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index word_id; +   ZoO_char as_word[2]; + +   as_word[0] = punctuation; +   as_word[1] = '\0'; + +   if (ZoO_knowledge_find_word_id(k, as_word, 2, &word_id) < 0) +   { +      ZoO_PROG_ERROR +      ( +         "'%s' was defined as a punctuation, was found in a string, yet is not" +         " defined in the knowledge database.", +         as_word +      ); + +      return -1; +   } + +   if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) +   { +      return -1; +   } + +   return 0; +} + +static int word_is_punctuation_terminated +( +   const ZoO_char string [const restrict static 1], +   const ZoO_index word_start, +   const ZoO_index word_length +) +{ +   return ZoO_char_is_punctuation(string[word_length]); +} + +/******************************************************************************/ +/** HANDLING WORDS ************************************************************/ +/******************************************************************************/ +static int add_word_to_sequence +( +   const ZoO_char string [const restrict static 1], +   const ZoO_index word_start, +   const ZoO_index word_length, +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1], +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index word_id; +   ZoO_char * stored_word; + +   if (word_length == 0) +   { +      return 0; +   } + +   if +   ( +      ZoO_knowledge_learn_word +      ( +         k, +         (string + word_start), +         word_length, +         &word_id +      ) < 0 +   ) +   { +      return -1; +   } + +   if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) +   { +      return -1; +   } + +   return 0; +} + +static int add_finding_to_sequence +( +   const ZoO_char string [const restrict static 1], +   const ZoO_index word_start, +   const ZoO_index word_length, +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1], +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index punctuation; + +   if (word_is_punctuation_terminated(string, word_start, word_length)) +   { +      punctuation = 1; +   } +   else +   { +      punctuation = 0; +   } + +   if +   ( +      add_word_to_sequence +      ( +         string, +         word_start, +         (word_length - punctuation), +         sequence, +         sequence_length, +         k +      ) < 0 +   ) +   { +      return -1; +   } + +   if +   ( +      (punctuation == 1) +      && +      ( +         add_punctuation_to_sequence +         ( +            string, +            string[word_start + word_length - 1], +            sequence, +            sequence_length, +            k +         ) < 0 +      ) +   ) +   { +      return -1; +   } + +   return 0; +} + +static int find_word +( +   const ZoO_char string [const restrict static 1], +   const ZoO_index string_length, +   const ZoO_index offset, +   ZoO_index word_start [const restrict static 1], +   ZoO_index word_length [const restrict static 1] +) +{ +   ZoO_index i; + +   i = offset; + +   while ((string[i] == ' ') && (i < string_length)) +   { +      i += 1; +   } + +   if (i >= string_length) +   { +      return -1; +   } + +   *word_start = i; + +   while ((string[i] != ' ') && (i < string_length)) +   { +      i += 1; +   } + +   if (i >= string_length) +   { +      return -1; +   } + +   *word_length = (i - *word_start); + +   return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ +int ZoO_sequence_from_undercase_string +( +   const ZoO_char string [const restrict], +   const ZoO_index string_length, +   struct ZoO_knowledge k [const restrict static 1], +   ZoO_index * sequence [const restrict static 1], +   ZoO_index sequence_length [const restrict static 1] +) +{ +   ZoO_index word_start, word_length; +   ZoO_index i; + +   i = 0; + +   *sequence = (ZoO_index *) NULL; +   *sequence_length = 0; + +   if +   ( +      add_word_id_to_sequence +      ( +         ZoO_START_OF_SEQUENCE_ID, +         sequence, +         sequence_length +      ) < 0 +   ) +   { +      return -1; +   } + +   while (i < string_length) +   { +      if (find_word(string, i, string_length, &word_start, &word_length) < 0) +      { +         break; +      } + +      if +      ( +         add_finding_to_sequence +         ( +            string, +            word_start, +            word_length, +            sequence, +            sequence_length, +            k +         ) < 0 +      ) +      { +         free((void *) *sequence); +         *sequence = (ZoO_index *) NULL; +         *sequence_length = 0; + +         return -1; +      } + +      i = (word_start + word_length); +   } + +   if +   ( +      add_word_id_to_sequence +      ( +         ZoO_END_OF_SEQUENCE_ID, +         sequence, +         sequence_length +      ) < 0 +   ) +   { +      free((void *) *sequence); + +      *sequence = (ZoO_index *) NULL; +      *sequence_length = 0; + +      return -1; +   } + +   return 0; +} diff --git a/src/core/sequence_types.h b/src/core/sequence_types.h index 717d418..c260a8a 100644 --- a/src/core/sequence_types.h +++ b/src/core/sequence_types.h @@ -3,7 +3,8 @@  #define ZoO_START_OF_SEQUENCE_ID 0  #define ZoO_END_OF_SEQUENCE_ID   1 +#define ZoO_ACTION_SEQUENCE_ID   2 -#define ZoO_RESERVED_IDS_COUNT   2 +#define ZoO_RESERVED_IDS_COUNT   3  #endif diff --git a/src/io/data_input.c b/src/file/data_input.c index e31d33b..e31d33b 100644 --- a/src/io/data_input.c +++ b/src/file/data_input.c diff --git a/src/io/data_input.h b/src/file/data_input.h index a2f004b..a2f004b 100644 --- a/src/io/data_input.h +++ b/src/file/data_input.h diff --git a/src/io/data_input_types.h b/src/file/data_input_types.h index bd2709b..bd2709b 100644 --- a/src/io/data_input_types.h +++ b/src/file/data_input_types.h diff --git a/src/io/data_output.c b/src/file/data_output.c index 796d3d0..04e3964 100644 --- a/src/io/data_output.c +++ b/src/file/data_output.c @@ -1,4 +1,5 @@  #define _POSIX_C_SOURCE 200809L +  #include <stdlib.h>  #include <string.h>  #include <errno.h> diff --git a/src/io/data_output.h b/src/file/data_output.h index ef963a0..ef963a0 100644 --- a/src/io/data_output.h +++ b/src/file/data_output.h diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt deleted file mode 100644 index c36413a..0000000 --- a/src/io/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set( -   SRC_FILES ${SRC_FILES} -   ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c -   ${CMAKE_CURRENT_SOURCE_DIR}/network.c -   ${CMAKE_CURRENT_SOURCE_DIR}/data_input.c -   ${CMAKE_CURRENT_SOURCE_DIR}/data_output.c -) -set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) - diff --git a/src/io/parameters_types.h b/src/io/parameters_types.h deleted file mode 100644 index 92a9e30..0000000 --- a/src/io/parameters_types.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ -#define _ZoO_IO_PARAMETERS_TYPES_H_ - -struct ZoO_parameters -{ -   const char * restrict data_filename; -   const char * restrict new_data_filename; - -   const char * restrict irc_server_addr; -   const char * restrict irc_server_port; -   const char * restrict irc_server_channel; -   const char * restrict irc_username; -   const char * restrict irc_realname; - -   int reply_rate; - -   int aliases_count; -   const char * restrict * restrict aliases; -}; - -#endif diff --git a/src/io/network.c b/src/irc/network.c index edafd4f..edafd4f 100644 --- a/src/io/network.c +++ b/src/irc/network.c diff --git a/src/io/network.h b/src/irc/network.h index 647b19c..647b19c 100644 --- a/src/io/network.h +++ b/src/irc/network.h diff --git a/src/io/network_types.h b/src/irc/network_types.h index 9a328a7..9a328a7 100644 --- a/src/io/network_types.h +++ b/src/irc/network_types.h diff --git a/src/knowledge/CMakeLists.txt b/src/knowledge/CMakeLists.txt new file mode 100644 index 0000000..1245321 --- /dev/null +++ b/src/knowledge/CMakeLists.txt @@ -0,0 +1,11 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_sequence.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_word.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/core/knowledge.c b/src/knowledge/knowledge.c index 94d76cd..a72969e 100644 --- a/src/core/knowledge.c +++ b/src/knowledge/knowledge.c @@ -2,14 +2,20 @@  #include <string.h>  #include <stdint.h> /* defines SIZE_MAX */ -#include "../io/error.h" +#include "../cli/cli.h"  #include "knowledge.h"  /** Basic functions of the ZoO_knowledge structure ****************************/ + +/* See: "knowledge.h" */  void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1])  {     k->words = (struct ZoO_knowledge_word *) NULL;     k->words_length = 0;     k->words_sorted = (ZoO_index *) NULL; + +   k->sequences = (ZoO_index **) NULL; +   k->sequences_length = 0; +   k->sequences_sorted = (ZoO_index *) NULL;  } diff --git a/src/core/knowledge.h b/src/knowledge/knowledge.h index 057e436..51d94c4 100644 --- a/src/core/knowledge.h +++ b/src/knowledge/knowledge.h @@ -1,5 +1,5 @@ -#ifndef _ZoO_CORE_KNOWLEDGE_H_ -#define _ZoO_CORE_KNOWLEDGE_H_ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_H_  #include "../core/char_types.h"  #include "../core/index_types.h" @@ -10,11 +10,9 @@ void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1])  void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]); -  /*   * When returning 0: - *    {word} was either added to {k} or its representation in {k} has its - *    occurrences count increased. + *    {word} was added to {k}, or was already there.   *    {*result} indicates where {word} is in {k->words}.   *   * When returning -1: @@ -22,18 +20,28 @@ void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]);   *    {k} remains semantically unchanged.   *    {*result} may or may not have been altered.   */ -int ZoO_knowledge_learn +int ZoO_knowledge_learn_word  (     struct ZoO_knowledge k [const static 1],     const ZoO_char word [const restrict static 1], +   const ZoO_index word_length,     ZoO_index result [const restrict static 1]  );  int ZoO_knowledge_learn_sequence  ( -   struct ZoO_knowledge k [const static 1], -   const ZoO_index sequence [const restrict], -   const ZoO_index sequence_length +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sequence [const restrict static 1], +   const ZoO_index sequence_length, +   const ZoO_index markov_order +); + +int ZoO_knowledge_learn_markov_sequence +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sequence [const restrict static 1], +   const ZoO_index sequence_length, +   const ZoO_index markov_order  );  int ZoO_knowledge_get_following_sequences_ref @@ -74,6 +82,7 @@ int ZoO_knowledge_find_word_id  (     const struct ZoO_knowledge k [const restrict static 1],     const ZoO_char word [const restrict static 1], +   const size_t word_size,     ZoO_index result [const restrict static 1]  ); diff --git a/src/core/knowledge_finalize.c b/src/knowledge/knowledge_finalize.c index e4deda6..36a7406 100644 --- a/src/core/knowledge_finalize.c +++ b/src/knowledge/knowledge_finalize.c @@ -2,11 +2,11 @@  #include <string.h>  #include <stdint.h> /* defines SIZE_MAX */ -#include "../io/error.h" +#include "../cli/cli.h"  #include "knowledge.h" -void knowledge_sequence_collection_finalize +static void knowledge_sequence_collection_finalize  (     struct ZoO_knowledge_sequence_collection c [const restrict static 1]  ) @@ -54,7 +54,7 @@ void knowledge_sequence_collection_finalize     }  } -void knowledge_word_finalize +static void knowledge_word_finalize  (     struct ZoO_knowledge_word w [const restrict static 1]  ) @@ -73,6 +73,7 @@ void knowledge_word_finalize     knowledge_sequence_collection_finalize(&(w->preceded));  } +/* See: "knowledge.h" */  void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1])  {     ZoO_index i; diff --git a/src/knowledge/knowledge_learn_sequence.c b/src/knowledge/knowledge_learn_sequence.c new file mode 100644 index 0000000..23a5ca7 --- /dev/null +++ b/src/knowledge/knowledge_learn_sequence.c @@ -0,0 +1,324 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/sequence.h" + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZE ****************************************************************/ +/******************************************************************************/ +static void set_nth_sequence +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sorted_sequence_id, +   const ZoO_index sequence_id +) +{ +   /* Safe: (> k->sequences_length 1) */ +   if (sorted_sequence_id < (k->sequences_length - 1)) +   { +      memmove +      ( +         /* Safe: (=< (+ sorted_sequence_id 1) k->sequences_length) */ +         (void *) (k->sequences_sorted + (sorted_sequence_id + 1)), +         (const void *) (k->sequences_sorted + sorted_sequence_id), +         ((k->sequences_length - 1) - sorted_sequence_id) +      ); +   } + +   k->sequences_sorted[sorted_sequence_id] = sequence_id; +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static int reallocate_sequences_list +( +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index ** new_sequences; + +   if ((SIZE_MAX / sizeof(ZoO_index *)) > (size_t) k->sequences_length) +   { +      ZoO_S_ERROR +      ( +         "Unable to store the size of the sequences list, as it would overflow" +         "size_t variables." +      ); + +      return -1; +   } + +   new_sequences = +      (ZoO_index **) realloc +      ( +         (void *) k->sequences, +         (((size_t) k->sequences_length) * sizeof(ZoO_index *)) +      ); + +   if (new_sequences == (ZoO_index **) NULL) +   { +      ZoO_S_ERROR +      ( +         "Unable to allocate the memory required for the new sequence list." +      ); + +      return -1; +   } + +   k->sequences = new_sequences; + +   return 0; +} + +static int reallocate_sequences_sorted_list +( +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index * new_sequences_sorted; + +   if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->sequences_length) +   { +      ZoO_S_ERROR +      ( +         "Unable to store the size of the sorted sequences list, as it would" +         " overflow size_t variables." +      ); + +      return -1; +   } + +   new_sequences_sorted = +      (ZoO_index *) realloc +      ( +         (void *) k->sequences_sorted, +         ((size_t) k->sequences_length) * sizeof(ZoO_index) +      ); + +   if (new_sequences_sorted == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR +      ( +         "Unable to allocate the memory required for the new sorted sequences" +         " list." +      ); + +      return -1; +   } + +   k->sequences_sorted = new_sequences_sorted; + +   return 0; +} + +/* Pre: (=< ZoO_INDEX_MAX SIZE_MAX) */ +static ZoO_index * copy_sequence +( +   const ZoO_index base [const restrict static 1], +   const ZoO_index base_length, +   const ZoO_index markov_order +) +{ +   ZoO_index * result; + +   result = (ZoO_index *) calloc((size_t) base_length, sizeof(ZoO_index)); + +   if (result == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR +      ( +         "Unable to allocate the memory required to store a new sequence." +      ); + +      return (ZoO_index *) NULL; +   } + +   memcpy +   ( +      (void *) result, +      (const void *) base, +      (((size_t) base_length) * sizeof(ZoO_index)) +   ); + +   return result; +} + +static int add_sequence +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sequence [const restrict static 1], +   const ZoO_index sequence_length, +   const ZoO_index markov_order, /* Pre (> markov_order 1) */ +   const ZoO_index sequence_id, +   const ZoO_index sorted_sequence_id +) +{ +   ZoO_index * stored_sequence; + +   if (k->sequences_length == ZoO_INDEX_MAX) +   { +      ZoO_S_ERROR +      ( +         "Unable to add sequence: the variable that stores the number of known " +         "sequences would overflow." +      ); + +      return -1; +   } + +   stored_sequence = copy_sequence(sequence, sequence_length, markov_order); + +   if (stored_sequence == (ZoO_index *) NULL) +   { +      return -1; +   } + +   k->sequences_length += 1; + +   if (reallocate_sequences_list(k) < 0) +   { +      k->sequences_length -= 1; + +      return -1; +   } + +   k->sequences[sequence_id] = stored_sequence; + +   if (reallocate_sequences_sorted_list(k) < 0) +   { +      k->sequences_length -= 1; + +      return -1; +   } + +   set_nth_sequence(k, sorted_sequence_id, sequence_id); + +   return -1; +} + +/******************************************************************************/ +/** SEARCH ********************************************************************/ +/******************************************************************************/ + +static int find_sequence +( +   const struct ZoO_knowledge k [const static 1], +   const ZoO_index sequence [const restrict static 1], +   const ZoO_index sequence_length, +   const ZoO_index markov_order, /* Pre: (> 1) */ +   ZoO_index sequence_id [const restrict static 1] +) +{ +   /* This is a binary search */ +   int cmp; +   ZoO_index i, current_min, current_max; +   const ZoO_index markov_sequence_length = (markov_order - 1); + +   /* Handles the case where the list is empty ********************************/ +   current_max = k->sequences_length; + +   if (current_max == 0) +   { +      *sequence_id = 0; + +      return -1; +   } +   /***************************************************************************/ + +   current_min = 0; +   current_max -= 1; + +   for (;;) +   { +      i = (current_min + ((current_max - current_min) / 2)); + +      cmp = +         ZoO_sequence_cmp +         ( +            k->sequences[k->sequences_sorted[i]], +            markov_sequence_length, +            sequence, +            sequence_length +         ); + +      if (cmp > 0) +      { +         current_min = (i + 1); + +         if (current_min > current_max) +         { +            *sequence_id = current_min; + +            return -1; +         } +      } +      else if (cmp < 0) +      { +         if ((current_min > current_max) || (i == 0)) +         { +            *sequence_id = i; + +            return -1; +         } + +         current_max = (i - 1); +      } +      else +      { +         *sequence_id = k->sequences_sorted[i]; + +         return 0; +      } +   } +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_markov_sequence +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sequence [const restrict static 1], +   const ZoO_index sequence_length, +   const ZoO_index markov_order, /* Pre (> markov_order 1) */ +   ZoO_index sequence_id [const restrict static 1] +) +{ +   ZoO_index sorted_id; + +   if +   ( +      find_sequence +      ( +         k, +         sequence, +         sequence_length, +         markov_order, +         sequence_id +      ) == 0 +   ) +   { +      return 0; +   } + +   sorted_id = *sequence_id; +   *sequence_id = k->sequences_length; + +   return +      add_sequence +      ( +         k, +         sequence, +         sequence_length, +         markov_order, +         *sequence_id, +         sorted_id +      ); +} diff --git a/src/knowledge/knowledge_learn_word.c b/src/knowledge/knowledge_learn_word.c new file mode 100644 index 0000000..f55ac5b --- /dev/null +++ b/src/knowledge/knowledge_learn_word.c @@ -0,0 +1,276 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZING STRUCTURES ***************************************************/ +/******************************************************************************/ + +static void initialize_sequence_collection +( +   struct ZoO_knowledge_sequence_collection c [const restrict static 1] +) +{ +   c->sequences_ref = (ZoO_index *) NULL; +   c->sequences_ref_length = 0; +   c->sequences_ref_sorted = (ZoO_index *) NULL; +   c->occurrences = (ZoO_index *) NULL; +   c->targets = (ZoO_index **) NULL; +   c->targets_length = (ZoO_index *) NULL; +   c->targets_occurrences = (ZoO_index **) NULL; +} + +static void initialize_word +( +   struct ZoO_knowledge_word w [const restrict static 1] +) +{ +   w->word = (const ZoO_char *) NULL; +   w->word_size = 0; +   w->occurrences = 0; + +   initialize_sequence_collection(&(w->followed)); +   initialize_sequence_collection(&(w->preceded)); +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static ZoO_char * copy_word +( +   const ZoO_char original [const restrict static 1], +   const ZoO_index original_length +) +{ +   ZoO_char * result; + +   result = +      (ZoO_char *) +      calloc +      ( +         (size_t) (original_length + 1), +         sizeof(ZoO_char) +      ); + +   if (result == (ZoO_char *) NULL) +   { +      ZoO_S_ERROR("Unable to allocate memory to store new word."); + +      return (ZoO_char *) NULL; +   } + +   memcpy +   ( +      (void *) result, +      (const void *) original, +      (((size_t) original_length) * sizeof(ZoO_char)) +   ); + +   result[original_length] = '\0'; + +   return 0; +} + +static int reallocate_words_list +( +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   struct ZoO_knowledge_word * new_words; + +   if +   ( +      (SIZE_MAX / sizeof(struct ZoO_knowledge_word)) > (size_t) k->words_length +   ) +   { +      ZoO_S_ERROR +      ( +         "Unable to store the size of the words list, as it would overflow" +         "size_t variables." +      ); + +      return -1; +   } + +   new_words = +      (struct ZoO_knowledge_word *) realloc +      ( +         (void *) k->words, +         (((size_t) k->words_length) * sizeof(struct ZoO_knowledge_word)) +      ); + +   if (new_words == (struct ZoO_knowledge_word *) NULL) +   { +      ZoO_S_ERROR +      ( +         "Unable to allocate the memory required for the new words list." +      ); + +      return -1; +   } + +   k->words = new_words; + +   return 0; +} + +static int reallocate_words_sorted_list +( +   struct ZoO_knowledge k [const restrict static 1] +) +{ +   ZoO_index * new_words_sorted; + +   /* +    * This has already been tested previously for a struct ZoO_knowledge_word, +    * whose size is bigger than a ZoO_index. +    * */ +   /* +   if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->words_length) +   { +      ZoO_S_ERROR +      ( +         "Unable to store the size of the sorted words list, as it would" +         " overflow size_t variables." +      ); + +      return -1; +   } +   */ + +   new_words_sorted = +      (ZoO_index *) realloc +      ( +         (void *) k->words_sorted, +         (((size_t) k->words_length) * sizeof(ZoO_index)) +      ); + +   if (new_words_sorted == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR +      ( +         "Unable to allocate the memory required for the new sorted words list." +      ); + +      return -1; +   } + +   k->words_sorted = new_words_sorted; + +   return 0; +} + +static void set_nth_word +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_index sorted_word_id, +   const ZoO_index word_id +) +{ +   /* Safe: (> k->words_length 1) */ +   if (sorted_word_id < (k->words_length - 1)) +   { +      memmove +      ( +         /* Safe: (=< (+ sorted_word_id 1) k->words_length) */ +         (void *) (k->words_sorted + (sorted_word_id + 1)), +         (const void *) (k->words_sorted + sorted_word_id), +         ((k->words_length - 1) - sorted_word_id) +      ); +   } + +   k->words_sorted[sorted_word_id] = word_id; +} + +static int add_word +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1], +   const ZoO_index word_length, +   const ZoO_index word_id, +   const ZoO_index sorted_word_id +) +{ +   ZoO_char * stored_word; + +   if (k->words_length == ZoO_INDEX_MAX) +   { +      ZoO_S_ERROR +      ( +         "Unable to add word: the variable that stores the number of known " +         "words would overflow." +      ); + +      return -1; +   } + +   stored_word = copy_word(word, word_length); + +   if (stored_word == (ZoO_char *) NULL) +   { +      return -1; +   } + +   k->words_length += 1; + +   if (reallocate_words_list(k) < 0) +   { +      k->words_length -= 1; + +      return -1; +   } + +   initialize_word(k->words + word_id); + +   k->words[word_id].word = stored_word; +   k->words[word_id].word_size = ((word_length + 1) * sizeof(ZoO_char)); + +   if (reallocate_words_sorted_list(k) < 0) +   { +      k->words_length -= 1; + +      return -1; +   } + +   set_nth_word(k, sorted_word_id, word_id); + +   return -1; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_word +( +   struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1], +   const ZoO_index word_length, +   ZoO_index word_id [const restrict static 1] +) +{ +   ZoO_index sorted_id; + +   if +   ( +      ZoO_knowledge_find_word_id +      ( +         k, +         word, +         (word_length * sizeof(ZoO_char)), +         word_id +      ) == 0 +   ) +   { +      return 0; +   } + +   sorted_id = *word_id; +   *word_id = k->words_length; + +   return add_word(k, word, word_length, *word_id, sorted_id); +} diff --git a/src/core/knowledge_search.c b/src/knowledge/knowledge_search.c index d0c61ef..a48585b 100644 --- a/src/core/knowledge_search.c +++ b/src/knowledge/knowledge_search.c @@ -4,7 +4,7 @@  #include "../core/index.h"  #include "../core/sequence.h" -#include "../io/error.h" +#include "../cli/cli.h"  #include "knowledge.h" @@ -13,6 +13,7 @@ int ZoO_knowledge_find_word_id  (     const struct ZoO_knowledge k [const restrict static 1],     const ZoO_char word [const restrict static 1], +   const size_t word_size,     ZoO_index result [const restrict static 1]  )  { @@ -39,7 +40,7 @@ int ZoO_knowledge_find_word_id     {        i = (current_min + ((current_max - current_min) / 2)); -      cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word); +      cmp = ZoO_word_cmp(word, word_size, k->words[k->words_sorted[i]].word);        if (cmp > 0)        { @@ -65,7 +66,7 @@ int ZoO_knowledge_find_word_id        }        else        { -         *result = i; +         *result = k->words_sorted[i];           return 0;        } diff --git a/src/core/knowledge_types.h b/src/knowledge/knowledge_types.h index acd239f..7eafc8b 100644 --- a/src/core/knowledge_types.h +++ b/src/knowledge/knowledge_types.h @@ -1,5 +1,5 @@ -#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ -#define _ZoO_CORE_KNOWLEDGE_TYPES_H_ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_  #include "../core/index_types.h"  #include "../core/char_types.h" @@ -32,6 +32,7 @@ struct ZoO_knowledge     ZoO_index ** sequences;     ZoO_index sequences_length;     ZoO_index * sequences_sorted; +   ZoO_index sequences_length;  };  #endif diff --git a/src/core/main.c b/src/main.c index bb4ae23..bb4ae23 100644 --- a/src/core/main.c +++ b/src/main.c diff --git a/src/pervasive.h b/src/pervasive.h index b830326..c7c53a2 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -3,6 +3,12 @@  #include <limits.h> +#define ZoO_DEBUG_ALL 1 + +#ifndef ZoO_DEBUG_ALL +   #define ZoO_DEBUG_ALL 0 +#endif +  #ifndef ZoO_NETWORK_TIMEOUT     #define ZoO_NETWORK_TIMEOUT            200  #endif @@ -11,34 +17,6 @@     #define ZoO_MAX_REPLY_WORDS            64  #endif -#ifndef ZoO_DEFAULT_DATA_FILENAME -   #define ZoO_DEFAULT_DATA_FILENAME      "./memory.txt" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR -   #define ZoO_DEFAULT_IRC_SERVER_ADDR    "irc.foonetic.net" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_PORT -   #define ZoO_DEFAULT_IRC_SERVER_PORT    "6667" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL -   #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" -#endif - -#ifndef ZoO_DEFAULT_IRC_USERNAME -   #define ZoO_DEFAULT_IRC_USERNAME       "zeroofone" -#endif - -#ifndef ZoO_DEFAULT_IRC_REALNAME -   #define ZoO_DEFAULT_IRC_REALNAME       "Zero of One (bot)" -#endif - -#ifndef ZoO_DEFAULT_REPLY_RATE -   #define ZoO_DEFAULT_REPLY_RATE         8 -#endif -  #define ZoO__TO_STRING(x) #x  #define ZoO_TO_STRING(x) ZoO__TO_STRING(x)  #define ZoO_ISOLATE(a) do {a} while (0) | 


