| summaryrefslogtreecommitdiff | 
diff options
| -rw-r--r-- | CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/CMakeLists.txt | 5 | ||||
| -rw-r--r-- | src/core/CMakeLists.txt | 10 | ||||
| -rw-r--r-- | src/core/assimilate.c | 232 | ||||
| -rw-r--r-- | src/core/create_sentences.c | 486 | ||||
| -rw-r--r-- | src/core/knowledge.c | 447 | ||||
| -rw-r--r-- | src/core/knowledge.h | 42 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 46 | ||||
| -rw-r--r-- | src/core/main.c | 296 | ||||
| -rw-r--r-- | src/core/state_types.h | 16 | ||||
| -rw-r--r-- | src/io/CMakeLists.txt | 8 | ||||
| -rw-r--r-- | src/io/data_input.c | 98 | ||||
| -rw-r--r-- | src/io/data_input.h | 21 | ||||
| -rw-r--r-- | src/io/data_input_types.h | 16 | ||||
| -rw-r--r-- | src/io/error.h | 146 | ||||
| -rw-r--r-- | src/io/network.c | 483 | ||||
| -rw-r--r-- | src/io/network.h | 27 | ||||
| -rw-r--r-- | src/io/network_types.h | 26 | ||||
| -rw-r--r-- | src/io/parameters.c | 354 | ||||
| -rw-r--r-- | src/io/parameters.h | 13 | ||||
| -rw-r--r-- | src/io/parameters_types.h | 20 | ||||
| -rw-r--r-- | src/pervasive.h | 59 | ||||
| -rw-r--r-- | src/tool/CMakeLists.txt | 7 | ||||
| -rw-r--r-- | src/tool/strings.c | 280 | ||||
| -rw-r--r-- | src/tool/strings.h | 19 | ||||
| -rw-r--r-- | src/tool/strings_types.h | 15 | 
26 files changed, 3183 insertions, 0 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ba70690 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +project("Zero of One") + +include(FindPkgConfig) + +add_subdirectory(src) +set(CMAKE_C_FLAGS "-D_POSIX_SOURCE -std=c99 -O2") +# ${SRC_FILES} is recursively defined in the subdirectories. +# Each subdirectory only adds the source files that are present at its level. +add_executable(zero_of_one ${SRC_FILES}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..76a73ed --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,5 @@ +add_subdirectory(core) +add_subdirectory(io) +add_subdirectory(tool) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 0000000..2722355 --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,10 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/main.c +   ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c +   ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c +   ${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/core/assimilate.c b/src/core/assimilate.c new file mode 100644 index 0000000..eb6aa17 --- /dev/null +++ b/src/core/assimilate.c @@ -0,0 +1,232 @@ +#include <stdlib.h> +#include <string.h> + +#include "../io/error.h" + +#include "knowledge.h" + +static int link_to +( +   ZoO_index links_count [const restrict static 1], +   ZoO_index * links_occurrences [const restrict static 1], +   ZoO_index * links [const restrict static 1], +   ZoO_index const target +) +{ +   ZoO_index i, * new_p; + +   for (i = 0; i < *links_count; ++i) +   { +      if ((*links)[i] == target) +      { +         if ((*links_occurrences)[i] == ZoO_INDEX_MAX) +         { +            ZoO_S_WARNING +            ( +               "Maximum link occurrences count has been reached." +            ); + +            return -1; +         } + +         (*links_occurrences)[i] += 1; + +         return 0; +      } +   } + +   if (*links_count == ZoO_INDEX_MAX) +   { +      ZoO_S_WARNING("Maximum links count has been reached."); + +      return -1; +   } + +   new_p = +      (ZoO_index *) realloc +      ( +         *links_occurrences, +         ( +            ( +               /* Safe: *links_count < ZoO_INDEX_MAX */ +               (size_t) (*links_count + 1) +            ) +            * sizeof(ZoO_index) +         ) +      ); + +   if (new_p == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR("Could not reallocate a link occurrences list."); + +      return -1; +   } + +   new_p[*links_count] = 1; + +   *links_occurrences = new_p; + +   new_p = +      (ZoO_index *) realloc +      ( +         *links, +         ( +            ( +               /* Safe: *links_count < ZoO_INDEX_MAX */ +               (size_t) (*links_count + 1) +            ) * sizeof(ZoO_index) +         ) +      ); + +   if (new_p == (ZoO_index *) NULL) +   { +      ZoO_S_ERROR("Could not reallocate a link list."); + +      return -1; +   } + +   new_p[*links_count] = target; + +   *links = new_p; + +   *links_count += 1; + +   return 0; +} + +static int link_words +( +   struct ZoO_knowledge k [const restrict static 1], +   ZoO_index const a, +   ZoO_index const b +) +{ +   int error; + +   error = +      link_to +      ( +         &(k->words[a].forward_links_count), +         &(k->words[a].forward_links_occurrences), +         &(k->words[a].forward_links), +         b +      ); + +   error = +      ( +         link_to +         ( +            &(k->words[b].backward_links_count), +            &(k->words[b].backward_links_occurrences), +            &(k->words[b].backward_links), +            a +         ) +         | error +      ); + +   return error; +} + +int ZoO_knowledge_assimilate +( +   struct ZoO_knowledge k [const static 1], +   struct ZoO_strings string [const restrict static 1], +   ZoO_index const aliases_count, +   const char * restrict aliases [const restrict static aliases_count] +) +{ +   int error; +   ZoO_index curr_word, next_word; +   ZoO_index curr_word_id, next_word_id; + +   curr_word = 0; + +   if (string->words_count == 0) +   { +      return 0; +   } + +   for (curr_word = 0; curr_word < aliases_count; ++curr_word) +   { +      if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0])) +      { +         return 0; +      } +   } + +   curr_word = 0; + +   if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0) +   { +      return -1; +   } + +   if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0) +   { +      error = -1; + +      ZoO_WARNING +      ( +         "Could not indicate that '" +         ZoO_CHAR_STRING_SYMBOL +         "' was the first word of the sentence.", +         string->words[0] +      ); +   } + +   next_word = 1; + +   error = 0; + +   while (next_word < string->words_count) +   { +      /* prevents words [restrict], k [restrict] */ +      if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0) +      { +         return -1; +      } + +      if (link_words(k, curr_word_id, next_word_id) < 0) +      { +         error = -1; + +         ZoO_WARNING +         ( +            "Could not add a link between words '" +            ZoO_CHAR_STRING_SYMBOL +            "' and '" +            ZoO_CHAR_STRING_SYMBOL +            "'.", +            string->words[curr_word], +            string->words[next_word] +         ); +      } + +      curr_word = next_word; +      curr_word_id = next_word_id; +      /* +       * Safe: +       *  - next_word < words_count +       *  - words_count =< ZoO_INDEX_MAX +       *  ---- +       *  next_word < ZoO_INDEX_MAX +       */ +      next_word += 1; +   } + +   if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0) +   { +      error = -1; + +      ZoO_WARNING +      ( +         "Could not indicate that '" +         ZoO_CHAR_STRING_SYMBOL +         "' was the last word of the sentence.", +         string->words[curr_word_id] +      ); +   } + +   return error; +} + diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c new file mode 100644 index 0000000..bc410e5 --- /dev/null +++ b/src/core/create_sentences.c @@ -0,0 +1,486 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +static ZoO_index pick_an_index +( +   ZoO_index const occurrences, +   const ZoO_index links_occurrences [const restrict static 1], +   const ZoO_index links [const restrict static 1] +) +{ +   ZoO_index result, accumulator, random_number; + +   result = 0; +   accumulator = links_occurrences[0]; +   random_number = (((ZoO_index) rand()) % occurrences); + +   while (accumulator < random_number) +   { + +      /* +       * Should be safe: +       * result overflowing <-> sum('links_occurrences') > 'occurrences' +       * and sum('links_occurrences') == 'occurrences' +       */ +      result += 1; + +      /* +       * Should be safe: +       *  - sum('links_occurrences') == 'occurrences'. +       *  - 'occurrences' is safe. +       *  ---- +       *  'accumulator' is safe. +       */ +      accumulator += links_occurrences[result]; +   } + +   return links[result]; +} + +static unsigned char * extend_left +( +   struct ZoO_knowledge k [const restrict static 1], +   ZoO_index word_id, +   ZoO_char current_sentence [static 1], +   size_t sentence_size [const restrict static 1], +   ZoO_index credits [const static 1] +) +{ +   size_t addition_size; +   struct ZoO_knowledge_word * w; +   ZoO_char * next_sentence; + +   w = (k->words + word_id); + +   if +   ( +      (w->special == ZoO_WORD_STARTS_SENTENCE) +      || (w->occurrences == 0) +   ) +   { +      return current_sentence; +   } + +   /* prevents current_sentence [restrict] */ +   next_sentence = current_sentence; + +   for (;;) +   { +      if (*credits == 0) +      { +         return current_sentence; +      } + +      *credits -= 1; +      word_id = +         pick_an_index +         ( +            w->occurrences, +            w->backward_links_occurrences, +            w->backward_links +         ); + +      w = (k->words + word_id); + +      switch (w->special) +      { +         case ZoO_WORD_HAS_NO_EFFECT: +            /* FIXME: not overflow-safe. */ +            /* word also contains an '\0', which we will replace by a ' ' */ +            addition_size = w->word_size; +            break; + +         case ZoO_WORD_ENDS_SENTENCE: +            ZoO_S_WARNING("END OF LINE should not be prefixable."); +            return current_sentence; + +         case ZoO_WORD_STARTS_SENTENCE: +            return current_sentence; + +         case ZoO_WORD_REMOVES_LEFT_SPACE: +         case ZoO_WORD_REMOVES_RIGHT_SPACE: +            /* word also contains an '\0', which we will remove. */ +            addition_size = w->word_size - 1; +            break; +      } + +      if (*sentence_size > (SIZE_MAX - addition_size)) +      { +         ZoO_S_WARNING +         ( +            "Sentence construction aborted to avoid size_t overflow." +         ); + +         return current_sentence; +      } + +      next_sentence = +         (ZoO_char *) calloc +         ( +            /* overflow-safe */ +            (*sentence_size + addition_size), +            sizeof(ZoO_char) +         ); + +      if (next_sentence == (ZoO_char *) NULL) +      { +         ZoO_S_ERROR("Could not allocate memory to store new sentence."); + +         return current_sentence; +      } + +      /* overflow-safe */ +      *sentence_size = (*sentence_size + addition_size); + +      switch (w->special) +      { +         case ZoO_WORD_HAS_NO_EFFECT: +            snprintf +            ( +               next_sentence, +               *sentence_size, +               " " ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, +               w->word, +               current_sentence +            ); +            break; + +         case ZoO_WORD_REMOVES_LEFT_SPACE: +            snprintf +            ( +               next_sentence, +               *sentence_size, +               ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, +               w->word, +               current_sentence +            ); +            break; + +         case ZoO_WORD_REMOVES_RIGHT_SPACE: +            snprintf +            ( +               next_sentence, +               *sentence_size, +               ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, +               w->word, +               /* Safe: strlen(current_sentence) >= 2 */ +               (current_sentence + 1) +            ); +            break; + +         default: +            /* TODO: PROGRAM LOGIC ERROR */ +            break; +      } + +      free((void *) current_sentence); + +      /* prevents current_sentence [const] */ +      current_sentence = next_sentence; +   } +} + +static unsigned char * extend_right +( +   struct ZoO_knowledge k [const restrict static 1], +   ZoO_index word_id, +   ZoO_char current_sentence [static 1], +   size_t sentence_size [const restrict static 1], +   ZoO_index credits [const static 1] +) +{ +   size_t addition_size; +   struct ZoO_knowledge_word * w; +   ZoO_char * next_sentence; + +   w = (k->words + word_id); + +   if +   ( +      (w->special == ZoO_WORD_ENDS_SENTENCE) +      || (w->occurrences == 0) +   ) +   { +      return current_sentence; +   } + +   /* prevents current_sentence [restrict] */ +   next_sentence = current_sentence; + +   for (;;) +   { +      if (*credits == 0) +      { +         return current_sentence; +      } + +      *credits -= 1; + +      word_id = +         pick_an_index +         ( +            w->occurrences, +            w->forward_links_occurrences, +            w->forward_links +         ); + +      w = (k->words + word_id); + +      switch (w->special) +      { +         case ZoO_WORD_HAS_NO_EFFECT: +            /* FIXME: Assumed to be overflow-safe. */ +            /* word also contains an '\0', which we will replace by a ' '. */ +            addition_size = w->word_size; +            break; + +         case ZoO_WORD_ENDS_SENTENCE: +            return current_sentence; + +         case ZoO_WORD_STARTS_SENTENCE: +            ZoO_S_WARNING("START OF LINE should not be suffixable."); +            return current_sentence; + +         case ZoO_WORD_REMOVES_LEFT_SPACE: +         case ZoO_WORD_REMOVES_RIGHT_SPACE: +            /* word also contains an '\0', which we will remove. */ +            addition_size = w->word_size - 1; +            break; +      } + +      if (*sentence_size > (SIZE_MAX - addition_size)) +      { +         ZoO_S_WARNING +         ( +            "Sentence construction aborted to avoid size_t overflow." +         ); + +         return current_sentence; +      } + +      next_sentence = +         (ZoO_char *) calloc +         ( +            /* overflow-safe */ +            (*sentence_size + addition_size), +            sizeof(ZoO_char) +         ); + +      if (next_sentence == (ZoO_char *) NULL) +      { +         ZoO_S_ERROR("Could not allocate memory to store new sentence."); + +         return current_sentence; +      } + +      /* overflow-safe */ +      *sentence_size = (*sentence_size + addition_size); + +      switch (w->special) +      { +         case ZoO_WORD_REMOVES_LEFT_SPACE: +            printf +            ( +               "current sentence:'%s', pointing at '%c'.\n", +               current_sentence, +               current_sentence[*sentence_size - addition_size - 2] +            ); +            current_sentence[*sentence_size - addition_size - 2] = '\0'; + +         case ZoO_WORD_HAS_NO_EFFECT: +            snprintf +            ( +               next_sentence, +               *sentence_size, +               ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL " ", +               current_sentence, +               w->word +            ); +            break; + +         case ZoO_WORD_REMOVES_RIGHT_SPACE: +            snprintf +            ( +               next_sentence, +               *sentence_size, +               ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, +               current_sentence, +               w->word +            ); +            break; + +         default: +            /* TODO: PROGRAM LOGIC ERROR */ +            break; +      } + +      free((void *) current_sentence); + +      /* prevents current_sentence [const] */ +      current_sentence = next_sentence; +   } +} + +int ZoO_knowledge_extend +( +   struct ZoO_knowledge k [const static 1], +   const struct ZoO_strings string [const static 1], +   int const ignore_first_word, +   ZoO_char * result [const static 1] +) +{ +   int word_found; +   size_t sentence_size; +   ZoO_index i, word_id, word_min_score, word_min_id, credits; + +   word_found = 0; +   credits = ZoO_MAX_REPLY_WORDS; + +   if (ignore_first_word) +   { +      i = 1; +   } +   else +   { +      i = 0; +   } + +   for (; i < string->words_count; ++i) +   { +      /* prevents k [restrict] */ +      if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0) +      { +         word_found = 1; +         word_min_score = k->words[word_min_id].occurrences; + +         break; +      } +   } + +   if (word_found == 0) +   { +      word_min_id = (rand() % k->words_count); +      word_min_score = k->words[word_min_id].occurrences; +   } + +   for (; i < string->words_count; ++i) +   { +      if +      ( +         (ZoO_knowledge_find(k, string->words[i], &word_id) == 0) +         && (k->words[word_id].occurrences < word_min_score) +      ) +      { +         word_min_score = k->words[word_id].occurrences; +         word_min_id = word_id; +      } +   } + +   /* 3: 2 spaces + '\0' */ +   /* FIXME: not overflow-safe */ +   switch (k->words[word_min_id].special) +   { +      case ZoO_WORD_REMOVES_LEFT_SPACE: +      case ZoO_WORD_REMOVES_RIGHT_SPACE: +         /* word + ' ' + '\0' */ +         sentence_size = (strlen(k->words[word_min_id].word) + 2); +         break; + +      case ZoO_WORD_HAS_NO_EFFECT: +         /* word + ' ' * 2 + '\0' */ +         sentence_size = (strlen(k->words[word_min_id].word) + 3); +         break; + +      default: +         ZoO_WARNING +         ( +            "'%s' was unexpectedly selected as pillar.", +            k->words[word_min_id].word +         ); +         /* word + '[' + ']' + ' ' * 2 + '\0' */ +         sentence_size = (strlen(k->words[word_min_id].word) + 5); +         break; +   } + +   *result = (ZoO_char *) calloc(sentence_size, sizeof(ZoO_char)); + +   if (*result == (ZoO_char *) NULL) +   { +      ZoO_S_ERROR("Could not allocate memory to start sentence."); + +      return -2; +   } + +   switch (k->words[word_min_id].special) +   { +      case ZoO_WORD_REMOVES_LEFT_SPACE: +         snprintf +         ( +            *result, +            sentence_size, +            ZoO_CHAR_STRING_SYMBOL " ", +            k->words[word_min_id].word +         ); +         break; + +      case ZoO_WORD_REMOVES_RIGHT_SPACE: +         snprintf +         ( +            *result, +            sentence_size, +            " " ZoO_CHAR_STRING_SYMBOL, +            k->words[word_min_id].word +         ); +         break; + +      case ZoO_WORD_HAS_NO_EFFECT: +         snprintf +         ( +            *result, +            sentence_size, +            " " ZoO_CHAR_STRING_SYMBOL " ", +            k->words[word_min_id].word +         ); +         break; + +      default: +         snprintf +         ( +            *result, +            sentence_size, +            " [" ZoO_CHAR_STRING_SYMBOL "] ", +            k->words[word_min_id].word +         ); +         break; +   } + +   if ((word_min_score == 0) || (credits == 0)) +   { +      return 0; +   } + +   --credits; + +   /* prevents result [restrict] */ +   *result = extend_left(k, word_min_id, *result, &sentence_size, &credits); + +   if (*result == (ZoO_char *) NULL) +   { +      return -2; +   } + +   *result = extend_right(k, word_min_id, *result, &sentence_size, &credits); + +   if (*result == (ZoO_char *) NULL) +   { +      return -2; +   } + +   return 0; +} diff --git a/src/core/knowledge.c b/src/core/knowledge.c new file mode 100644 index 0000000..31ccb97 --- /dev/null +++ b/src/core/knowledge.c @@ -0,0 +1,447 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_punctuation_chars_count = 7; +const ZoO_char const ZoO_knowledge_punctuation_chars[7] = +   { +      '!', +      ',', +      '.', +      ':', +      ';', +      '?', +      '~' +   }; + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_forbidden_chars_count = 8; +const ZoO_char const ZoO_knowledge_forbidden_chars[8]= +   { +      '(', +      ')', +      '[', +      ']', +      '{', +      '}', +      '<', +      '>' +   }; + +int ZoO_knowledge_find +( +   const struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1], +   ZoO_index result [const restrict static 1] +) +{ +   int cmp; +   ZoO_index i, current_min, current_max; + +   /* This is a binary search. */ + +   if (k->words_count < 1) +   { +      *result = 0; + +      return -1; +   } + +   current_min = 0; + +   /* overflow-safe: k->words_count >= 1 */ +   current_max = (k->words_count - 1); + +   for (;;) +   { +      /* FIXME: overflow-safe? */ +      i = ((current_min + current_max) / 2); + +      if (i == k->words_count) +      { +         *result = k->words_count; + +         return -1; +      } + +      cmp = +         /* XXX: Assumed to be compatible with ZoO_char */ +         strcmp +         ( +            (char *) word, +            (const char *) k->words[k->sorted_indices[i]].word +         ); + +      if (cmp > 0) +      { +         if ((current_min > current_max)) +         { +            *result = (i + 1); + +            return -1; +         } + +         /* FIXME: overflow-safe? */ +         current_min = (i + 1); +      } +      else if (cmp < 0) +      { +         if ((current_min > current_max) || (i == 0)) +         { +            *result = i; + +            return -1; +         } + +         /* overflow-safe */ +         current_max = (i - 1); +      } +      else +      { +         *result = k->sorted_indices[i]; + +         return 0; +      } +   } +} + +static void word_init (struct ZoO_knowledge_word w [const restrict static 1]) +{ +   w->word_size = 0; +   w->word = (ZoO_char *) NULL; +   w->special = ZoO_WORD_HAS_NO_EFFECT; +   w->occurrences = 1; +   w->forward_links_count  = 0; +   w->backward_links_count = 0; +   w->forward_links_occurrences  = (ZoO_index *) NULL; +   w->backward_links_occurrences = (ZoO_index *) NULL; +   w->forward_links  = (ZoO_index *) NULL; +   w->backward_links = (ZoO_index *) NULL; +} + +static int add_punctuation_nodes +( +   struct ZoO_knowledge k [const static 1] +) +{ +   int error; +   char w[2]; +   ZoO_index i, id; + +   if (ZoO_knowledge_learn(k, "START OF LINE", &id) < 0) +   { +      ZoO_S_FATAL("Could not add 'START OF LINE' to knowledge."); + +      return -2; +   } + +   k->words[id].special = ZoO_WORD_STARTS_SENTENCE; +   k->words[id].occurrences = 0; + +   if (ZoO_knowledge_learn(k, "END OF LINE", &id) < 0) +   { +      ZoO_S_FATAL("Could not add 'END OF LINE' to knowledge."); + +      return -2; +   } + +   k->words[id].special = ZoO_WORD_ENDS_SENTENCE; +   k->words[id].occurrences = 0; + +   w[1] = '\0'; + +   error = 0; + +   for (i = 0; i < ZoO_knowledge_punctuation_chars_count; ++i) +   { +      w[0] = ZoO_knowledge_punctuation_chars[i]; + +      if (ZoO_knowledge_learn(k, w, &id) < 0) +      { +         ZoO_WARNING("Could not add '%s' to knowledge.", w); + +         error = -1; +      } +      else +      { +         k->words[id].special = ZoO_WORD_REMOVES_LEFT_SPACE; +         k->words[id].occurrences = 0; +      } +   } + +   return error; +} + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) +{ +   k->words_count = 0; +   k->words = (struct ZoO_knowledge_word *) NULL; +   k->sorted_indices = (ZoO_index *) NULL; + +   if (add_punctuation_nodes(k) < -1) +   { +      ZoO_knowledge_finalize(k); + +      return -1; +   } + +   return 0; +} + +static void finalize_word +( +   struct ZoO_knowledge_word w [const restrict static 1] +) +{ +   if (w->word != (ZoO_char *) NULL) +   { +      free((void *) w->word); + +      w->word = (ZoO_char *) NULL; +   } + +   if (w->forward_links_occurrences != (ZoO_index *) NULL) +   { +      free((void *) w->forward_links_occurrences); + +      w->forward_links_occurrences = (ZoO_index *) NULL; +   } + +   if (w->backward_links_occurrences != (ZoO_index *) NULL) +   { +      free((void *) w->backward_links_occurrences); + +      w->backward_links_occurrences = (ZoO_index *) NULL; +   } + +   if (w->forward_links != (ZoO_index *) NULL) +   { +      free((void *) w->forward_links); + +      w->forward_links = (ZoO_index *) NULL; +   } + +   if (w->backward_links != (ZoO_index *) NULL) +   { +      free((void *) w->backward_links); + +      w->backward_links = (ZoO_index *) NULL; +   } + +   w->forward_links_count  = 0; +   w->backward_links_count = 0; +} + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) +{ +   ZoO_index i; + +   for (i = 0; i < k->words_count; ++i) +   { +      /* prevents k [restrict] */ +      finalize_word(k->words + i); +   } + +   k->words_count = 0; + +   if (k->words != (struct ZoO_knowledge_word *) NULL) +   { +      free((void *) k->words); + +      k->words = (struct ZoO_knowledge_word *) NULL; +   } + +   if (k->sorted_indices != (ZoO_index *) NULL) +   { +      free((void *) k->sorted_indices); + +      k->sorted_indices = (ZoO_index *) NULL; +   } +} + +int ZoO_knowledge_learn +( +   struct ZoO_knowledge k [const static 1], +   const ZoO_char word [const restrict static 1], +   ZoO_index result [const restrict static 1] +) +{ +   struct ZoO_knowledge_word * new_wordlist; +   ZoO_index * new_sorted_indices; +   ZoO_index temp; + +   /* prevents k [restrict] */ +   if (ZoO_knowledge_find(k, word, result) == 0) +   { +      if (k->words[*result].occurrences == ZoO_INDEX_MAX) +      { +         ZoO_WARNING +         ( +            "Maximum number of occurrences has been reached for word '" +            ZoO_CHAR_STRING_SYMBOL +            "'.", +            word +         ); + +         return -1; +      } + +      /* overflow-safe */ +      k->words[*result].occurrences += 1; + +      return 0; +   } + +   if (k->words_count == ZoO_INDEX_MAX) +   { +      ZoO_S_WARNING("Maximum number of words has been reached."); + +      return -1; +   } + +   new_wordlist = +      (struct ZoO_knowledge_word *) realloc +      ( +         (void *) k->words, +         ( +            ( +               /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ +               (size_t) (k->words_count + 1) +            ) +            * sizeof(struct ZoO_knowledge_word) +         ) +      ); + +   if (new_wordlist == (struct ZoO_knowledge_word *) NULL) +   { +      ZoO_ERROR +      ( +         "Could not learn the word '%s': unable to realloc the word list.", +         word +      ); + +      return -1; +   } + +   k->words = new_wordlist; + +   new_sorted_indices = +      (ZoO_index *) realloc +      ( +         (void *) k->sorted_indices, +         ( +            ( +               /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ +               (size_t) (k->words_count + 1) +            ) +            * sizeof(ZoO_index) +         ) +      ); + +   if (new_sorted_indices == (ZoO_index *) NULL) +   { +      ZoO_ERROR +      ( +         "Could not learn the word '" +         ZoO_CHAR_STRING_SYMBOL +         "': unable to realloc the index list.", +         word +      ); + +      return -1; +   } + +   k->sorted_indices = new_sorted_indices; + +   /* We can only move indices right of *result if they exist. */ +   if (*result != k->words_count) +   { +      /* TODO: check if correct. */ +      memmove +      ( +         /* +          * overflow-safe: +          *  - k->words_count < ZoO_INDEX_MAX +          *  - (k->sorted_indices + *result + 1) =< k->words_count +          */ +         (void *) (k->sorted_indices + *result + 1), +         /* overflow-safe: see above */ +         (const void *) (k->sorted_indices + *result), +         ( +            ( +               /* overflow-safe: *result < k->words_count */ +               (size_t) (k->words_count - *result) +            ) +            * sizeof(ZoO_index) +         ) +      ); +   } + +   temp = *result; + +   k->sorted_indices[*result] = k->words_count; + +   *result = k->words_count; + +   word_init(k->words + *result); + +   /* XXX: strlen assumed to work with ZoO_char. */ +   k->words[*result].word_size = strlen(word); + +   if (k->words[*result].word_size == SIZE_MAX) +   { +      ZoO_S_WARNING +      ( +         "Could not learn word that had a size too big to store in a '\\0' " +         "terminated string. Chances are, this is but a symptom of the real " +         "problem." +      ); + +      return -1; +   } + +   /* We also need '\0' */ +   k->words[*result].word_size += 1; + +   k->words[*result].word = +      (ZoO_char *) calloc +      ( +         k->words[*result].word_size, +         sizeof(ZoO_char) +      ); + +   if (k->words[*result].word == (ZoO_char *) NULL) +   { +      ZoO_S_ERROR +      ( +         "Could not learn word due to being unable to allocate the memory to " +         "store it." +      ); + +      k->words[*result].word_size = 0; + +      return -1; +   } + +   memcpy(k->words[*result].word, word, k->words[*result].word_size); + +   /* Safe: k->words_count < ZoO_INDEX_MAX */ +   k->words_count += 1; + +   ZoO_DEBUG +   ( +      ZoO_DEBUG_LEARNING, +      "Learned word {'%s', id: %u, rank: %u}", +      word, +      *result, +      temp +   ); + +   return 0; +} + diff --git a/src/core/knowledge.h b/src/core/knowledge.h new file mode 100644 index 0000000..f20cb16 --- /dev/null +++ b/src/core/knowledge.h @@ -0,0 +1,42 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_H_ +#define _ZoO_CORE_KNOWLEDGE_H_ + +#include "../tool/strings_types.h" + +#include "knowledge_types.h" + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]); + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); + +int ZoO_knowledge_find +( +   const struct ZoO_knowledge k [const restrict static 1], +   const ZoO_char word [const restrict static 1], +   ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_learn +( +   struct ZoO_knowledge k [const static 1], +   const ZoO_char word [const restrict static 1], +   ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_assimilate +( +   struct ZoO_knowledge k [const static 1], +   struct ZoO_strings string [const restrict static 1], +   ZoO_index const aliases_count, +   const char * restrict aliases [const restrict static aliases_count] +); + +int ZoO_knowledge_extend +( +   struct ZoO_knowledge k [const static 1], +   const struct ZoO_strings string [const static 1], +   int const ignore_first_word, +   ZoO_char * result [const static 1] +); + +#endif diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h new file mode 100644 index 0000000..f2e8161 --- /dev/null +++ b/src/core/knowledge_types.h @@ -0,0 +1,46 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ +#define _ZoO_CORE_KNOWLEDGE_TYPES_H_ + +#include "../pervasive.h" + +#define ZoO_WORD_START_OF_LINE 0 +#define ZoO_WORD_END_OF_LINE   1 + +/* XXX: are we as close to immutable as we want to be? */ +extern unsigned int const ZoO_knowledge_punctuation_chars_count; +extern const ZoO_char const ZoO_knowledge_punctuation_chars[7]; +extern unsigned int const ZoO_knowledge_forbidden_chars_count; +extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; + + +enum ZoO_knowledge_special_effect +{ +   ZoO_WORD_HAS_NO_EFFECT, +   ZoO_WORD_ENDS_SENTENCE, +   ZoO_WORD_STARTS_SENTENCE, +   ZoO_WORD_REMOVES_LEFT_SPACE, +   ZoO_WORD_REMOVES_RIGHT_SPACE +}; + +struct ZoO_knowledge_word +{ +   size_t word_size; +   ZoO_char * word; +   enum ZoO_knowledge_special_effect special; +   ZoO_index occurrences; +   ZoO_index forward_links_count; +   ZoO_index backward_links_count; +   ZoO_index * forward_links_occurrences; +   ZoO_index * backward_links_occurrences; +   ZoO_index * forward_links; +   ZoO_index * backward_links; +}; + +struct ZoO_knowledge +{ +   ZoO_index words_count; +   ZoO_index * sorted_indices; +   struct ZoO_knowledge_word * words; +}; + +#endif diff --git a/src/core/main.c b/src/core/main.c new file mode 100644 index 0000000..34233d6 --- /dev/null +++ b/src/core/main.c @@ -0,0 +1,296 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <signal.h> + +#include "../tool/strings.h" + +#include "../io/error.h" +#include "../io/parameters.h" +#include "../io/data_input.h" +#include "../io/network.h" + + +#include "knowledge.h" + +#include "state_types.h" + +static int run = 1; + +static void request_termination (int const signo) +{ +   if ((signo == SIGINT) || (signo == SIGTERM)) +   { +      run = 0; +   } +} + +static int initialize +( +   struct ZoO_state s [const static 1], +   int const argc, +   const char * argv [const static argc] +) +{ +   ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing..."); + +   srand(time(NULL)); + +   /* prevents s [restrict] */ +   if (ZoO_knowledge_initialize(&(s->knowledge)) < 0) +   { +      return -1; +   } + +   if (ZoO_parameters_initialize(&(s->param), argc, argv) < 0) +   { +      ZoO_knowledge_finalize(&(s->knowledge)); + +      return -1; +   } + +   return 0; +} + +static int load_data_file (struct ZoO_state s [const static 1]) +{ +   struct ZoO_data_input input; +   char * result; + +   if (ZoO_data_input_open(&input, s->param.data_filename) < 0) +   { +      return -1; +   } + +   while +   ( +      ZoO_data_input_read_line +      ( +         &input, +         ZoO_knowledge_punctuation_chars_count, +         ZoO_knowledge_punctuation_chars +      ) == 0 +   ) +   { +      (void) ZoO_knowledge_assimilate +      ( +         &(s->knowledge), +         &(input.string), +         s->param.aliases_count, +         s->param.aliases +      ); +   } + +   ZoO_data_input_close(&input); + +   return 0; +} + +static int finalize (struct ZoO_state s [const static 1]) +{ +   int error; + +   ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing..."); + +   error = 0; + +   /* prevents s [restrict] */ +   ZoO_knowledge_finalize(&(s->knowledge)); + +   return error; +} + +static int network_connect (struct ZoO_state s [const static 1]) +{ +   return +      ZoO_network_connect +      ( +         &(s->network), +         s->param.irc_server_addr, +         s->param.irc_server_port, +         s->param.irc_server_channel, +         s->param.irc_username, +         s->param.irc_realname, +         s->param.aliases[0] +      ); +} + +static int should_reply +( +   struct ZoO_parameters param [const restrict static 1], +   struct ZoO_strings string [const restrict static 1], +   int should_learn [const restrict static 1] +) +{ +   ZoO_index i, j; + +   for (i = 0; i < param->aliases_count; ++i) +   { +      if (ZoO_IS_PREFIX(param->aliases[i], string->words[0])) +      { +         *should_learn = 0; + +         return 1; +      } + +      for (j = 1; j < string->words_count; ++j) +      { +         if (ZoO_IS_PREFIX(param->aliases[i], string->words[j])) +         { +            *should_learn = 1; + +            return 1; +         } +      } +   } + +   *should_learn = 1; + +   return (param->reply_rate >= (rand() % 100)); +} + +static void handle_message +( +   struct ZoO_state s [const static 1], +   struct ZoO_strings string [const restrict static 1], +   ssize_t const msg_offset, +   ssize_t const msg_size +) +{ +   ZoO_char * line; +   int reply, learn; + +   if +   ( +      ZoO_strings_parse +      ( +         string, +         (size_t) msg_size, +         (s->network.msg + msg_offset), +         ZoO_knowledge_punctuation_chars_count, +         ZoO_knowledge_punctuation_chars +      ) < 0 +   ) +   { +      ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg."); + +      return; +   } + +   if (string->words_count == 0) +   { +      return; +   } + +   reply = should_reply(&(s->param), string, &learn); + +   if +   ( +      reply +      && +      ( +         ZoO_knowledge_extend +         ( +            &(s->knowledge), +            string, +            !learn, +            &line +         ) == 0 +      ) +   ) +   { +      if (line[0] == ' ') +      { +         strcpy((s->network.msg), (line + 1)); +      } +      else +      { +         strcpy((s->network.msg), line); +      } + +      free((void *) line); + +      ZoO_network_send(&(s->network)); +   } + +   if (learn) +   { +      (void) ZoO_knowledge_assimilate +      ( +         &(s->knowledge), +         string, +         s->param.aliases_count, +         s->param.aliases +      ); +   } +} + +static int main_loop  (struct ZoO_state s [const static 1]) +{ +   struct ZoO_strings string; +   ssize_t msg_offset, msg_size; + +   msg_offset = 0; +   msg_size = 0; + +   ZoO_strings_initialize(&string); + +   while (run) +   { +      if (ZoO_network_receive(&(s->network), &msg_offset, &msg_size) == 0) +      { +         handle_message(s, &string, msg_offset, msg_size); +      } +   } + +   ZoO_strings_finalize(&string); + +   ZoO_network_disconnect(&(s->network)); + +   return 0; +} + +int main (int const argc, const char * argv [const static argc]) +{ +   struct ZoO_state s; + +   if (initialize(&s, argc, argv) < 0) +   { +      return -1; +   } + +   if (load_data_file(&s) < 0) +   { +      goto CRASH; +   } + +   if (network_connect(&s) < 0) +   { +      goto CRASH; +   } + +   if (main_loop(&s) < 0) +   { +      goto CRASH; +   } + +   (void) finalize(&s); + +   ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally."); + +   return 0; + +   CRASH: +   { +      (void) finalize(&s); + +      ZoO_S_DEBUG +      ( +         ZoO_DEBUG_PROGRAM_FLOW, +         "Zero of One terminated by crashing." +      ); + +      return -1; +   } +} diff --git a/src/core/state_types.h b/src/core/state_types.h new file mode 100644 index 0000000..89c814e --- /dev/null +++ b/src/core/state_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_CORE_STATE_TYPES_H_ +#define _ZoO_CORE_STATE_TYPES_H_ + +#include "../io/parameters_types.h" +#include "../io/network_types.h" + +#include "knowledge_types.h" + +struct ZoO_state +{ +   struct ZoO_parameters param; +   struct ZoO_knowledge knowledge; +   struct ZoO_network network; +}; + +#endif diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt new file mode 100644 index 0000000..a13154b --- /dev/null +++ b/src/io/CMakeLists.txt @@ -0,0 +1,8 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c +   ${CMAKE_CURRENT_SOURCE_DIR}/network.c +   ${CMAKE_CURRENT_SOURCE_DIR}/data_input.c +) +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/io/data_input.c b/src/io/data_input.c new file mode 100644 index 0000000..e31d33b --- /dev/null +++ b/src/io/data_input.c @@ -0,0 +1,98 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "error.h" + +#include "data_input.h" + +int ZoO_data_input_open +( +   struct ZoO_data_input di [const static 1], +   const char filename [const restrict static 1] +) +{ +   /* prevents di [restrict] */ +   ZoO_strings_initialize(&(di->string)); + +   di->file = fopen(filename, "r"); + +   if (di->file == (FILE *) NULL) +   { +      ZoO_ERROR +      ( +         "Could not open file '%s' in readonly mode.", +         filename +      ); + +      return -1; +   } + +   return 0; +} + +int ZoO_data_input_read_line +( +   struct ZoO_data_input di [const static 1], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count] +) +{ +   size_t line_size, i, w_start; +   ZoO_char * line; + +   /* prevents di [restrict] */ +   ZoO_strings_finalize(&(di->string)); + +   line = (ZoO_char *) NULL; +   line_size = 0; + +   /* XXX: assumed compatible with ZoO_char */ + +   if (getline(&line, &line_size, di->file) < 1) +   { +      free((void *) line); + +      return -1; +   } + +   line_size = strlen(line); +   line[line_size - 1] = '\0'; + +   --line_size; /* removed '\n' */ + +   if +   ( +      ZoO_strings_parse +      ( +         &(di->string), +         line_size, +         line, +         punctuations_count, +         punctuations +      ) < 0 +   ) +   { +      free((void *) line); + +      return -1; +   } + +   free((void *) line); + +   return 0; +} + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]) +{ +   if (di->file != (FILE *) NULL) +   { +      fclose(di->file); + +      di->file = (FILE *) NULL; +   } + +   /* prevents di [restrict] */ +   ZoO_strings_finalize(&(di->string)); +} diff --git a/src/io/data_input.h b/src/io/data_input.h new file mode 100644 index 0000000..a2f004b --- /dev/null +++ b/src/io/data_input.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_IO_DATA_INPUT_H_ +#define _ZoO_IO_DATA_INPUT_H_ + +#include "data_input_types.h" + +int ZoO_data_input_open +( +   struct ZoO_data_input di [const static 1], +   const char filename [const restrict static 1] +); + +int ZoO_data_input_read_line +( +   struct ZoO_data_input di [const static 1], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count] +); + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]); + +#endif diff --git a/src/io/data_input_types.h b/src/io/data_input_types.h new file mode 100644 index 0000000..bd2709b --- /dev/null +++ b/src/io/data_input_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_IO_DATA_INPUT_TYPES_H_ +#define _ZoO_IO_DATA_INPUT_TYPES_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +#include "../tool/strings.h" + +struct ZoO_data_input +{ +   FILE * restrict file; +   struct ZoO_strings string; +}; + +#endif diff --git a/src/io/error.h b/src/io/error.h new file mode 100644 index 0000000..e4267a0 --- /dev/null +++ b/src/io/error.h @@ -0,0 +1,146 @@ +#ifndef _ZoO_IO_ERROR_H_ +#define _ZoO_IO_ERROR_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +#define ZoO_DEBUG_ALL 1 + +#ifndef ZoO_DEBUG_ALL +   #define ZoO_DEBUG_ALL 0 +#endif + +#ifndef ZoO_DEBUG_PROGRAM_FLOW +   #define ZoO_DEBUG_PROGRAM_FLOW   (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_CONFIG +   #define ZoO_DEBUG_CONFIG         (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_LEARNING +   #define ZoO_DEBUG_LEARNING       (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_NETWORK +   #define ZoO_DEBUG_NETWORK        (0 || ZoO_DEBUG_ALL) +#endif + +#define ZoO_ENABLE_WARNINGS_OUTPUT              1 +#define ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT        1 +#define ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT    1 +#define ZoO_ENABLE_FATAL_ERROR_OUTPUT           1 + +#ifdef ZoO_ENABLE_ERROR_LOCATION +   #define ZoO_LOCATION "[" __FILE__ "][" ZoO_TO_STRING(__LINE__) "]" +#else +   #define ZoO_LOCATION "" +#endif + +#define ZoO_PRINT_STDERR(symbol, str, ...)\ +   fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n", __VA_ARGS__); + +/* + * Given that we use preprocessor contants as flags, we can expect the compilers + * to remove the test condition for disabled flags. No need to be shy about + * allowing many debug options. + */ + +#define ZoO_DEBUG(flag, str, ...)\ +   ZoO_ISOLATE\ +   (\ +      if (flag)\ +      {\ +         ZoO_PRINT_STDERR("D", str, __VA_ARGS__);\ +      }\ +   ) + + +#define ZoO_WARNING(str, ...)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_WARNINGS_OUTPUT)\ +      {\ +         ZoO_PRINT_STDERR("W", str, __VA_ARGS__);\ +      }\ +   ) + +#define ZoO_ERROR(str, ...)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ +      {\ +         ZoO_PRINT_STDERR("E", str, __VA_ARGS__);\ +      }\ +   ) + +#define ZoO_PROG_ERROR(str, ...)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ +      {\ +         ZoO_PRINT_STDERR("P", str, __VA_ARGS__);\ +      }\ +   ) + +#define ZoO_FATAL(str, ...)\ +   ZoO_ISOLATE\ +   (\ +     if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ +      {\ +         ZoO_PRINT_STDERR("F", str, __VA_ARGS__);\ +      }\ +   ) + +/* For outputs without dynamic content (static). ******************************/ + +#define ZoO_PRINT_S_STDERR(symbol, str)\ +   fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n"); + +#define ZoO_S_DEBUG(flag, str)\ +   ZoO_ISOLATE\ +   (\ +      if (flag)\ +      {\ +         ZoO_PRINT_S_STDERR("D", str);\ +      }\ +   ) + +#define ZoO_S_WARNING(str)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_WARNINGS_OUTPUT)\ +      {\ +         ZoO_PRINT_S_STDERR("W", str);\ +      }\ +   ) + +#define ZoO_S_ERROR(str)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ +      {\ +         ZoO_PRINT_S_STDERR("E", str);\ +      }\ +   ) + +#define ZoO_S_PROG_ERROR(str)\ +   ZoO_ISOLATE\ +   (\ +      if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ +      {\ +         ZoO_PRINT_S_STDERR("P", str);\ +      }\ +   ) + +#define ZoO_S_FATAL(str)\ +   ZoO_ISOLATE\ +   (\ +     if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ +      {\ +         ZoO_PRINT_S_STDERR("F", str);\ +      }\ +   ) + +#endif diff --git a/src/io/network.c b/src/io/network.c new file mode 100644 index 0000000..c8d05a2 --- /dev/null +++ b/src/io/network.c @@ -0,0 +1,483 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> + +/* "POSIX.1  does not require the inclusion of <sys/types.h>" */ +/* - man page for setsockopt */ +/* #include <sys/types.h> */ +#include <sys/socket.h> +#include <sys/time.h> + +#include "error.h" + +#include "network.h" + +static int reconnect (struct ZoO_network net [const restrict static 1]) +{ +   struct timeval timeout; +   int old_errno = errno; + +   errno = 0; +   timeout.tv_sec = ZoO_NETWORK_TIMEOUT; +   timeout.tv_usec = 0; + +   if (net->connection != -1) +   { +      close(net->connection); +   } + +   net->connection = +      socket +      ( +         net->addrinfo->ai_family, +         net->addrinfo->ai_socktype, +         net->addrinfo->ai_protocol +      ); + +   if (net->connection == -1) +   { +      ZoO_FATAL +      ( +         "Could not create socket: %s.", +         strerror(errno) +      ); + +      errno = old_errno; + +      return -1; +   } + +   errno = 0; + +   if +   ( +      ( +         setsockopt +         ( +            net->connection, +            SOL_SOCKET, +            SO_RCVTIMEO, +            (const void *) &timeout, +            (socklen_t) sizeof(struct timeval) +         ) < 0 +      ) +      || +      ( +         setsockopt +         ( +            net->connection, +            SOL_SOCKET, +            SO_SNDTIMEO, +            (const void *) &timeout, +            (socklen_t) sizeof(struct timeval) +         ) < 0 +      ) +   ) +   { +      ZoO_ERROR("Could not set timeout on network socket: %s", strerror(errno)); + +      errno = old_errno; + +      return -1; +   } + +   errno = old_errno; + +   ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connecting to network..."); + +   if +   ( +      connect +      ( +         net->connection, +         net->addrinfo->ai_addr, +         net->addrinfo->ai_addrlen +      ) != 0 +   ) +   { +      ZoO_ERROR +      ( +         "Unable to connect to the network: %s", +         strerror(errno) +      ); + +      errno = old_errno; + +      return -1; +   } + +   errno = old_errno; + +   snprintf +   ( +      net->msg, +      512, +      "USER %s 8 * :%s\r\n", +      net->user, +      net->name +   ); + +   errno = 0; + +   if (write(net->connection, net->msg, strlen(net->msg)) < 1) +   { +      ZoO_ERROR +      ( +         "Unable to write to the network: %s", +         strerror(errno) +      ); + +      errno = old_errno; + +      return -1; +   } + +   snprintf +   ( +      net->msg, +      512, +      "NICK %s\r\n", +      net->nick +   ); + +   errno = 0; + +   if (write(net->connection, net->msg, strlen(net->msg)) < 1) +   { +      ZoO_ERROR +      ( +         "Unable to write to the network: %s", +         strerror(errno) +      ); + +      errno = old_errno; + +      return -1; +   } + +   errno = old_errno; + +   net->buffer_remaining = 0; +   net->buffer_index = 0; +   ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connected."); + +   return 0; +} + +int ZoO_network_connect +( +   struct ZoO_network net [const static 1], +   const char host [const restrict static 1], +   const char port [const restrict static 1], +   const char channel [const restrict static 1], +   const char user [const restrict static 1], +   const char name [const restrict static 1], +   const char nick [const restrict static 1] +) +{ +   int error; +   struct addrinfo hints; +   const int old_errno = errno; + +   net->connection = -1; +   net->channel = channel; +   net->user = user; +   net->name = name; +   net->nick = nick; +   net->buffer_index = 0; +   net->buffer_remaining = 0; + +   memset(&hints, 0, sizeof(struct addrinfo)); +   memset(net->msg, 0, (sizeof(ZoO_char) * 513)); + +   hints.ai_family = AF_INET; +   hints.ai_socktype = SOCK_STREAM; + +   errno = 0; + +   error = getaddrinfo(host, port, &hints, &(net->addrinfo)); + +   if (error != 0) +   { +      if (error == EAI_SYSTEM) +      { +         ZoO_ERROR +         ( +            "Could not retrieve server information: %s.", +            strerror(errno) +         ); +      } +      else +      { +         ZoO_FATAL +         ( +            "Could not retrieve server information: %s.", +            gai_strerror(error) +         ); +      } + +      errno = old_errno; + +      return -1; +   } + +   errno = 0; + + +   reconnect(net); + +   return 0; +} + +int ZoO_network_receive +( +   struct ZoO_network net [const restrict static 1], +   size_t msg_offset [const restrict static 1], +   size_t msg_size [const restrict static 1] +) +{ +   int old_errno; +   ssize_t in_count, in_index, msg_index, cmd; + +   old_errno = errno; + +   for (;;) +   { +      msg_index = 0; + +      errno = 0; + +      while +      ( +         ( +            (in_count = +               read( +                  net->connection, +                  (net->buffer + net->buffer_index), +                  (512 - net->buffer_index) +               ) +            ) > 0 +         ) +      ) +      { +         net->buffer_remaining += in_count; + +         for +         ( +            in_index = 0; +            in_index < net->buffer_remaining; +            ++in_index +         ) +         { +            net->msg[msg_index] = net->buffer[net->buffer_index + in_index]; + +            if +            ( +               (msg_index == 511) +               || +               ( +                  (msg_index > 0) +                  && (net->msg[msg_index - 1] == '\r') +                  && (net->msg[msg_index] == '\n') +               ) +            ) +            { +               net->msg[msg_index + 1] = '\0'; + + +               if (net->buffer_index != net->buffer_remaining) +               { +                  memmove +                  ( +                     net->buffer, +                     (net->buffer + net->buffer_index), +                     (size_t) net->buffer_remaining +                  ); + +                  net->buffer_index = 0; +               } + +               net->buffer_remaining -= (in_index + 1); + +               errno = old_errno; + +               goto READ_MSG; +            } + +            ++msg_index; +         } + +         net->buffer_remaining = 0; +         net->buffer_index = 0; + +         errno = 0; +      } + +      ZoO_ERROR +      ( +         "Something went wrong while trying to read from the network: %s.", +         strerror(errno) +      ); + +      errno = old_errno; + +      if (reconnect(net) < 0) +      { +         return -1; +      } + +      continue; + +      READ_MSG: + +      ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s\n", net->msg); + +      /* XXX: doesn't that prevent net [restrict]? */ +      if (ZoO_IS_PREFIX("PING", net->msg)) +      { +         errno = 0; + +         net->msg[1] = 'O'; + +         if (write(net->connection, net->msg, strlen(net->msg)) < 1) +         { +            ZoO_ERROR("Could not reply to PING request: %s", strerror(errno)); + +            errno = old_errno; + +            if (reconnect(net) < 0) +            { +               return -1; +            } + +            continue; +         } + +         ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s\n", net->msg); + +         errno = old_errno; +      } +      else if (net->msg[0] == ':') +      { +         cmd = 0; + +         for (in_index = 1; in_index < 512; in_index++) +         { +            if (net->msg[in_index] == ' ') +            { +               cmd = (in_index + 1); + +               break; +            } +         } + +         if (cmd == 0) +         { +            continue; +         } + +         if (ZoO_IS_PREFIX("001", (net->msg + cmd))) +         { +            snprintf +            ( +               net->msg, +               512, +               "JOIN :%s\r\n", +               net->channel +            ); + +            errno = 0; + +            if (write(net->connection, net->msg, strlen(net->msg)) < 1) +            { +               ZoO_ERROR +               ( +                  "Could not send JOIN request: %s", +                  strerror(errno) +               ); + +               errno = old_errno; + +               if (reconnect(net) < 0) +               { +                  return -1; +               } +            } + +            ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->msg); + +            continue; +         } + +         if (ZoO_IS_PREFIX("PRIVMSG", (net->msg + cmd))) +         { +            for (; in_index < 512; in_index++) +            { +               if (net->msg[in_index] == ':') +               { +                  cmd = (in_index + 1); + +                  break; +               } +            } + +            *msg_offset = cmd; +            *msg_size = (msg_index - *msg_offset - 1); + +            /*net->msg[*msg_size - 1] = '\0'; */ + +            return 0; +         } +      } +   } +} + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]) +{ +   int const old_errno = errno; + +   snprintf +   ( +      net->buffer, +      512, +      "PRIVMSG %s :%s\r\n", +      net->channel, +      net->msg +   ); + +   errno = 0; + +   if (write(net->connection, net->buffer, strlen(net->buffer)) < 1) +   { +      ZoO_ERROR +      ( +         "Could not send PRIVMSG: %s.", +         strerror(errno) +      ); + +      errno = old_errno; + +      if (reconnect(net) < 0) +      { +         return -2; +      } +      else +      { +         return -1; +      } +   } + +   errno = old_errno; + +   ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->buffer); + +   return 0; +} + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]) +{ +   freeaddrinfo(net->addrinfo); +   close(net->connection); +} + diff --git a/src/io/network.h b/src/io/network.h new file mode 100644 index 0000000..ac7284a --- /dev/null +++ b/src/io/network.h @@ -0,0 +1,27 @@ +#ifndef _ZoO_IO_NETWORK_H_ +#define _ZoO_IO_NETWORK_H_ +#include "network_types.h" + +int ZoO_network_connect +( +   struct ZoO_network net [const static 1], +   const char host [const restrict static 1], +   const char port [const restrict static 1], +   const char channel [const restrict static 1], +   const char user [const restrict static 1], +   const char name [const restrict static 1], +   const char nick [const restrict static 1] +); + +int ZoO_network_receive +( +   struct ZoO_network net [const static 1], +   size_t msg_offset [const restrict static 1], +   size_t msg_size [const restrict static 1] +); + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]); + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]); + +#endif diff --git a/src/io/network_types.h b/src/io/network_types.h new file mode 100644 index 0000000..16c81da --- /dev/null +++ b/src/io/network_types.h @@ -0,0 +1,26 @@ +#ifndef _ZoO_IO_NETWORK_TYPES_H_ +#define _ZoO_IO_NETWORK_TYPES_H_ + +#define POSIX_C_SOURCE + +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> + +#include "../pervasive.h" + +struct ZoO_network +{ +   size_t buffer_index; +   size_t buffer_remaining; +   struct addrinfo * addrinfo; +   ZoO_char buffer [513]; +   ZoO_char msg [513]; +   int connection; +   const char * restrict channel; +   const char * restrict user; +   const char * restrict name; +   const char * restrict nick; +}; + +#endif diff --git a/src/io/parameters.c b/src/io/parameters.c new file mode 100644 index 0000000..0f7d05c --- /dev/null +++ b/src/io/parameters.c @@ -0,0 +1,354 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> + +#include "../pervasive.h" + +#include "error.h" + +#include "parameters.h" + +static void load_default_parameters +( +   struct ZoO_parameters param [const restrict static 1] +) +{ +   param->data_filename       = ZoO_DEFAULT_DATA_FILENAME; + +   param->irc_server_addr     = ZoO_DEFAULT_IRC_SERVER_ADDR; +   param->irc_server_port     = ZoO_DEFAULT_IRC_SERVER_PORT; +   param->irc_server_channel  = ZoO_DEFAULT_IRC_SERVER_CHANNEL; +   param->irc_username        = ZoO_DEFAULT_IRC_USERNAME; +   param->irc_realname        = ZoO_DEFAULT_IRC_REALNAME; + +   param->reply_rate          = ZoO_DEFAULT_REPLY_RATE; + +   param->aliases_count = 0; +   param->aliases = NULL; +} + +static void print_help (const char exec [const restrict static 1]) +{ +   printf +   ( +      "Usage: %s [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...] \n" +      "NICKNAME is used as the IRC nickname value.\n" +      "If NICKNAME or any ALIAS is found in an event, the program will reply.\n" +      "\nAvailable options:\n" +      "   [--data-filename | -df] FILENAME\n" +      "      Learn content from FILENAME before connecting.\n" +      "      Default: %s.\n" +      "   [--irc-server-addr | -isa] IRC_SERVER_ADDR\n" +      "      Connect to this server address.\n" +      "      Default: %s.\n" +      "   [--irc-server-port | -isp] IRC_SERVER_PORT\n" +      "      Connect to this server port.\n" +      "      Default: %s.\n" +      "   [--irc-server-channel | -isc] IRC_SERVER_CHANNEL\n" +      "      Connect to this server's channel.\n" +      "      Default: %s.\n" +      "   [--irc-username | -iu] USERNAME\n" +      "      Connect using this as 'username' (shown in WHOIS).\n" +      "      Default: %s.\n" +      "   [--irc-realname | -ir] REALNAME\n" +      "      Connect using this as 'realname' (shown in WHOIS).\n" +      "      Default: %s.\n" +      "   [--reply-rate | -rr] REPLY_RATE\n" +      "      Chance to reply to an event (integer, range [0, 100]).\n" +      "      Default: %d.\n", +      exec, +      ZoO_DEFAULT_DATA_FILENAME, +      ZoO_DEFAULT_IRC_SERVER_ADDR, +      ZoO_DEFAULT_IRC_SERVER_PORT, +      ZoO_DEFAULT_IRC_SERVER_CHANNEL, +      ZoO_DEFAULT_IRC_USERNAME, +      ZoO_DEFAULT_IRC_REALNAME, +      ZoO_DEFAULT_REPLY_RATE +   ); +} + +static int parse_string_arg +( +   const char * restrict dest [const restrict static 1], +   int const i, +   const char * restrict argv [const restrict static 1], +   int const argc +) +{ +   if (i == argc) +   { +      ZoO_FATAL +      ( +         "Missing value for parameter '%s'.", +         /* Safe: i > 1 */ +         argv[i - 1] +      ); + +      return -1; +   } + +   *dest = argv[i]; + +   return 0; +} + +static int parse_integer_arg +( +   int dest [const restrict static 1], +   int const i, +   const char * argv [const restrict static 1], +   int const argc, +   int const min_val, +   int const max_val +) +{ +   long int result; +   char * endptr; +   const int old_errno = errno; + +   if (i == argc) +   { +      ZoO_FATAL +      ( +         "Missing value for parameter '%s'.", +         /* Safe: i > 1 */ +         argv[i - 1] +      ); + +      return -1; +   } + +   errno = 0; + +   result = strtol(argv[i], &endptr, 10); + +   if +   ( +      (errno != 0) +      || ((*endptr) == '\n') +      || (result < min_val) +      || (result > max_val) +   ) +   { +      ZoO_FATAL +      ( +         "Invalid or missing value for parameter '%s', accepted range is " +         "[%d, %d] (integer).", +         /* Safe: i > 1 */ +         argv[i - 1], +         min_val, +         max_val +      ); + +      errno = old_errno; + +      return -1; +   } + +   *dest = (int) result; + +   errno = old_errno; + +   return 0; +} + +int ZoO_parameters_initialize +( +   struct ZoO_parameters param [const restrict static 1], +   int const argc, +   const char * argv [const restrict static argc] +) +{ +   int i; + +   load_default_parameters(param); + +   for (i = 1; i < argc; ++i) +   { +      if +      ( +         (strcmp(argv[i], "--data-filename") == 0) +         || (strcmp(argv[i], "-df") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->data_filename), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--irc-server-addr") == 0) +         || (strcmp(argv[i], "-isa") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->irc_server_addr), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--irc-server-port") == 0) +         || (strcmp(argv[i], "-isp") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->irc_server_port), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--irc-server-channel") == 0) +         || (strcmp(argv[i], "-isc") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->irc_server_channel), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--irc-username") == 0) +         || (strcmp(argv[i], "-iu") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->irc_username), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--irc-realname") == 0) +         || (strcmp(argv[i], "-in") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_string_arg +            ( +               &(param->irc_realname), +               i, +               argv, +               argc +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--reply-rate") == 0) +         || (strcmp(argv[i], "-rr") == 0) +      ) +      { +         i += 1; + +         if +         ( +            parse_integer_arg +            ( +               &(param->reply_rate), +               i, +               argv, +               argc, +               0, +               100 +            ) < 0 +         ) +         { +            return -1; +         } +      } +      else if +      ( +         (strcmp(argv[i], "--help") == 0) +         || (strcmp(argv[i], "-h") == 0) +      ) +      { +         print_help(argv[0]); + +         return 0; +      } +      else +      { +         break; +      } +   } + +   if (i == argc) +   { +      ZoO_S_FATAL("Missing argument: NICKNAME"); + +      print_help(argv[0]); + +      return -1; +   } + +   param->aliases_count = (argc - i); +   param->aliases = (argv + i); + +   return 1; +} diff --git a/src/io/parameters.h b/src/io/parameters.h new file mode 100644 index 0000000..1011e2b --- /dev/null +++ b/src/io/parameters.h @@ -0,0 +1,13 @@ +#ifndef _ZoO_IO_PARAMETERS_H_ +#define _ZoO_IO_PARAMETERS_H_ + +#include "parameters_types.h" + +int ZoO_parameters_initialize +( +   struct ZoO_parameters param [const static 1], +   int const argc, +   const char * argv [const static argc] +); + +#endif diff --git a/src/io/parameters_types.h b/src/io/parameters_types.h new file mode 100644 index 0000000..6d511d8 --- /dev/null +++ b/src/io/parameters_types.h @@ -0,0 +1,20 @@ +#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ +#define _ZoO_IO_PARAMETERS_TYPES_H_ + +struct ZoO_parameters +{ +   const char * restrict data_filename; + +   const char * restrict irc_server_addr; +   const char * restrict irc_server_port; +   const char * restrict irc_server_channel; +   const char * restrict irc_username; +   const char * restrict irc_realname; + +   int reply_rate; + +   int aliases_count; +   const char * restrict * restrict aliases; +}; + +#endif diff --git a/src/pervasive.h b/src/pervasive.h new file mode 100644 index 0000000..d2b0344 --- /dev/null +++ b/src/pervasive.h @@ -0,0 +1,59 @@ +#ifndef _ZoO_PERVASIVE_H_ +#define _ZoO_PERVASIVE_H_ + +#include <limits.h> + +#ifndef ZoO_NETWORK_TIMEOUT +   #define ZoO_NETWORK_TIMEOUT            200 +#endif + +#ifndef ZoO_MAX_REPLY_WORDS +   #define ZoO_MAX_REPLY_WORDS            64 +#endif + +#ifndef ZoO_DEFAULT_DATA_FILENAME +   #define ZoO_DEFAULT_DATA_FILENAME      "./memory.txt" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR +   #define ZoO_DEFAULT_IRC_SERVER_ADDR    "irc.foonetic.net" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_PORT +   #define ZoO_DEFAULT_IRC_SERVER_PORT    "6667" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL +   #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" +#endif + +#ifndef ZoO_DEFAULT_IRC_USERNAME +   #define ZoO_DEFAULT_IRC_USERNAME       "zeroofone" +#endif + +#ifndef ZoO_DEFAULT_IRC_REALNAME +   #define ZoO_DEFAULT_IRC_REALNAME       "Zero of One (bot)" +#endif + +#ifndef ZoO_DEFAULT_REPLY_RATE +   #define ZoO_DEFAULT_REPLY_RATE         8 +#endif + +typedef unsigned int ZoO_index; +#define ZoO_INDEX_MAX UINT_MAX + +/* ZoO_char = UTF-8 char */ +typedef char ZoO_char; +/* Functions that can handle UTF-8 'char' will use this symbol. */ +#define ZoO_CHAR_STRING_SYMBOL "%s" + +#define ZoO__TO_STRING(x) #x +#define ZoO_TO_STRING(x) ZoO__TO_STRING(x) +#define ZoO_ISOLATE(a) do {a} while (0) + +/* strncmp stops at '\0' and strlen does not count '\0'. */ +#define ZoO_IS_PREFIX(a, b) (strncmp(a, b, strlen(a)) == 0) + +#define ZoO_STRING_EQUALS(a, b) (strcmp(a, b) == 0) + +#endif diff --git a/src/tool/CMakeLists.txt b/src/tool/CMakeLists.txt new file mode 100644 index 0000000..3a1d947 --- /dev/null +++ b/src/tool/CMakeLists.txt @@ -0,0 +1,7 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/strings.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/tool/strings.c b/src/tool/strings.c new file mode 100644 index 0000000..fc4434a --- /dev/null +++ b/src/tool/strings.c @@ -0,0 +1,280 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "strings.h" + + +void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]) +{ +   s->words_count = 0; +   s->words = (ZoO_char **) NULL; +   s->word_sizes = (size_t *) NULL; +} + +void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]) +{ +   if (s->words_count != 0) +   { +      ZoO_index i; + +      for (i = 0; i < s->words_count; ++i) +      { +         free((void *) s->words[i]); +      } + +      s->words_count = 0; + +      free((void *) s->words); +      free((void *) s->word_sizes); + +      s->words = (ZoO_char **) NULL; +      s->word_sizes = (size_t *) NULL; +   } +} + +static int add_word +( +   struct ZoO_strings s [const restrict static 1], +   size_t const line_size, +   const ZoO_char line [const restrict static line_size] +) +{ +   size_t * new_s_word_sizes; +   ZoO_char * new_word, ** new_s_words; + +   if (s->words_count == ZoO_INDEX_MAX) +   { +      ZoO_S_WARNING("Data input sentence has too many words."); + +      return -1; +   } + +   /* overflow-safe, as line_size < SIZE_MAX */ +   new_word = (ZoO_char *) calloc((line_size + 1), sizeof(ZoO_char)); + +   if (new_word == (ZoO_char *) NULL) +   { +      ZoO_S_WARNING("Unable to allocate memory to extract new word."); + +      return -1; +   } + +   memcpy((void *) new_word, (const void *) line, line_size); + +   new_word[line_size] = '\0'; + +   new_s_words = +      (ZoO_char **) realloc +      ( +         (void *) s->words, +         /* XXX: (sizeof() * _) assumed overflow-safe. */ +         /* (di->words_count + 1) overflow-safe */ +         (sizeof(ZoO_char *) * (s->words_count + 1)) +      ); + +   if (new_s_words == (ZoO_char **) NULL) +   { +      ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + +      free((void *) new_word); + +      return -1; +   } + +   s->words = new_s_words; + +   new_s_word_sizes = +      (size_t *) realloc +      ( +         (void *) s->word_sizes, +         /* XXX: (sizeof() * _) assumed overflow-safe. */ +         /* (di->words_count + 1) overflow-safe */ +         (sizeof(size_t) * (s->words_count + 1)) +      ); + +   if (new_s_word_sizes == (size_t *) NULL) +   { +      ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + +      free((void *) new_word); + +      return -1; +   } + +   s->word_sizes = new_s_word_sizes; + +   s->words[s->words_count] = new_word; +   s->word_sizes[s->words_count] = (line_size + 1); + +   s->words_count += 1; + +   return 0; +} + +static int parse_word +( +   struct ZoO_strings s [const restrict static 1], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count], +   size_t const line_size, +   ZoO_char line [const static line_size] +) +{ +   ZoO_index j; + +   if (line_size == 0) +   { +      return 0; +   } + +   for (j = 0; j < line_size; ++j) +   { +      switch (line[j]) +      { +         case 'A': +         case 'B': +         case 'C': +         case 'D': +         case 'E': +         case 'F': +         case 'G': +         case 'H': +         case 'I': +         case 'J': +         case 'K': +         case 'L': +         case 'M': +         case 'N': +         case 'O': +         case 'P': +         case 'Q': +         case 'R': +         case 'S': +         case 'T': +         case 'U': +         case 'V': +         case 'W': +         case 'X': +         case 'Y': +         case 'Z': +            line[j] = 'z' - ('Z' - line[j]); +            break; + +         default: +            break; +      } +   } + +   for (j = 0; j < punctuations_count; ++j) +   { +      /* overflow-safe: line_size > 1 */ +      if (line[line_size - 1] == punctuations[j]) +      { +         if (line_size > 1) +         { +            if +            ( +               /* overflow-safe: line_size > 1 */ +               (add_word(s, (line_size - 1), line) < 0) +               /* overflow-safe: line_size > 1 */ +               /* prevents line[restrict] */ +               || (add_word(s, 1, (line + (line_size - 1))) < 0) +            ) +            { +               return -1; +            } + +            return 0; +         } +      } +   } + +   return add_word(s, line_size, line); +} + +int ZoO_strings_parse +( +   struct ZoO_strings s [const restrict static 1], +   size_t const input_size, +   ZoO_char input [const restrict], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count] +) +{ +   size_t i, w_start; + +   ZoO_strings_finalize(s); + +   if (input == NULL) +   { +      return 0; +   } + +   i = 0; + +   /* overflow-safe: input is '\0' terminated. */ +   while (input[i] == ' ') +   { +      ++i; +   } + +   w_start = i; + +   for (; i < input_size; ++i) +   { +      if (input[i] == ' ') +      { +         if +         ( +            parse_word +            ( +               s, +               punctuations_count, +               punctuations, +               /* overflow-safe: w_start < i */ +               (i - w_start), +               (input + w_start) +            ) < 0 +         ) +         { +            ZoO_strings_finalize(s); + +            return -1; +         } + +         ++i; + +         /* safe, as input is terminated by '\0' */ +         while (input[i] == ' ') +         { +            ++i; +         } + +         w_start = i; +      } +   } + +   if +   ( +      parse_word +      ( +         s, +         punctuations_count, +         punctuations, +         /* overflow-safe: w_start < i */ +         (i - w_start), +         (input + w_start) +      ) < 0 +   ) +   { +      ZoO_strings_finalize(s); + +      return -1; +   } + +   return 0; +} diff --git a/src/tool/strings.h b/src/tool/strings.h new file mode 100644 index 0000000..6e6e211 --- /dev/null +++ b/src/tool/strings.h @@ -0,0 +1,19 @@ +#ifndef _ZoO_TOOL_STRINGS_H_ +#define _ZoO_TOOL_STRINGS_H_ + +#include "strings_types.h" + +void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]); + +void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]); + +int ZoO_strings_parse +( +   struct ZoO_strings s [const static 1], +   size_t const input_size, +   ZoO_char input [const restrict], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count] +); + +#endif diff --git a/src/tool/strings_types.h b/src/tool/strings_types.h new file mode 100644 index 0000000..f74dcc8 --- /dev/null +++ b/src/tool/strings_types.h @@ -0,0 +1,15 @@ +#ifndef _ZoO_TOOL_STRINGS_TYPES_H_ +#define _ZoO_TOOL_STRINGS_TYPES_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +struct ZoO_strings +{ +   ZoO_index words_count; +   ZoO_char * restrict * restrict words; +   size_t * restrict word_sizes; +}; + +#endif | 


