| summaryrefslogtreecommitdiff | 
diff options
Diffstat (limited to 'src/tool/strings.c')
| -rw-r--r-- | src/tool/strings.c | 280 | 
1 files changed, 280 insertions, 0 deletions
diff --git a/src/tool/strings.c b/src/tool/strings.c new file mode 100644 index 0000000..fc4434a --- /dev/null +++ b/src/tool/strings.c @@ -0,0 +1,280 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "strings.h" + + +void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]) +{ +   s->words_count = 0; +   s->words = (ZoO_char **) NULL; +   s->word_sizes = (size_t *) NULL; +} + +void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]) +{ +   if (s->words_count != 0) +   { +      ZoO_index i; + +      for (i = 0; i < s->words_count; ++i) +      { +         free((void *) s->words[i]); +      } + +      s->words_count = 0; + +      free((void *) s->words); +      free((void *) s->word_sizes); + +      s->words = (ZoO_char **) NULL; +      s->word_sizes = (size_t *) NULL; +   } +} + +static int add_word +( +   struct ZoO_strings s [const restrict static 1], +   size_t const line_size, +   const ZoO_char line [const restrict static line_size] +) +{ +   size_t * new_s_word_sizes; +   ZoO_char * new_word, ** new_s_words; + +   if (s->words_count == ZoO_INDEX_MAX) +   { +      ZoO_S_WARNING("Data input sentence has too many words."); + +      return -1; +   } + +   /* overflow-safe, as line_size < SIZE_MAX */ +   new_word = (ZoO_char *) calloc((line_size + 1), sizeof(ZoO_char)); + +   if (new_word == (ZoO_char *) NULL) +   { +      ZoO_S_WARNING("Unable to allocate memory to extract new word."); + +      return -1; +   } + +   memcpy((void *) new_word, (const void *) line, line_size); + +   new_word[line_size] = '\0'; + +   new_s_words = +      (ZoO_char **) realloc +      ( +         (void *) s->words, +         /* XXX: (sizeof() * _) assumed overflow-safe. */ +         /* (di->words_count + 1) overflow-safe */ +         (sizeof(ZoO_char *) * (s->words_count + 1)) +      ); + +   if (new_s_words == (ZoO_char **) NULL) +   { +      ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + +      free((void *) new_word); + +      return -1; +   } + +   s->words = new_s_words; + +   new_s_word_sizes = +      (size_t *) realloc +      ( +         (void *) s->word_sizes, +         /* XXX: (sizeof() * _) assumed overflow-safe. */ +         /* (di->words_count + 1) overflow-safe */ +         (sizeof(size_t) * (s->words_count + 1)) +      ); + +   if (new_s_word_sizes == (size_t *) NULL) +   { +      ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + +      free((void *) new_word); + +      return -1; +   } + +   s->word_sizes = new_s_word_sizes; + +   s->words[s->words_count] = new_word; +   s->word_sizes[s->words_count] = (line_size + 1); + +   s->words_count += 1; + +   return 0; +} + +static int parse_word +( +   struct ZoO_strings s [const restrict static 1], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count], +   size_t const line_size, +   ZoO_char line [const static line_size] +) +{ +   ZoO_index j; + +   if (line_size == 0) +   { +      return 0; +   } + +   for (j = 0; j < line_size; ++j) +   { +      switch (line[j]) +      { +         case 'A': +         case 'B': +         case 'C': +         case 'D': +         case 'E': +         case 'F': +         case 'G': +         case 'H': +         case 'I': +         case 'J': +         case 'K': +         case 'L': +         case 'M': +         case 'N': +         case 'O': +         case 'P': +         case 'Q': +         case 'R': +         case 'S': +         case 'T': +         case 'U': +         case 'V': +         case 'W': +         case 'X': +         case 'Y': +         case 'Z': +            line[j] = 'z' - ('Z' - line[j]); +            break; + +         default: +            break; +      } +   } + +   for (j = 0; j < punctuations_count; ++j) +   { +      /* overflow-safe: line_size > 1 */ +      if (line[line_size - 1] == punctuations[j]) +      { +         if (line_size > 1) +         { +            if +            ( +               /* overflow-safe: line_size > 1 */ +               (add_word(s, (line_size - 1), line) < 0) +               /* overflow-safe: line_size > 1 */ +               /* prevents line[restrict] */ +               || (add_word(s, 1, (line + (line_size - 1))) < 0) +            ) +            { +               return -1; +            } + +            return 0; +         } +      } +   } + +   return add_word(s, line_size, line); +} + +int ZoO_strings_parse +( +   struct ZoO_strings s [const restrict static 1], +   size_t const input_size, +   ZoO_char input [const restrict], +   ZoO_index const punctuations_count, +   const ZoO_char punctuations [const restrict static punctuations_count] +) +{ +   size_t i, w_start; + +   ZoO_strings_finalize(s); + +   if (input == NULL) +   { +      return 0; +   } + +   i = 0; + +   /* overflow-safe: input is '\0' terminated. */ +   while (input[i] == ' ') +   { +      ++i; +   } + +   w_start = i; + +   for (; i < input_size; ++i) +   { +      if (input[i] == ' ') +      { +         if +         ( +            parse_word +            ( +               s, +               punctuations_count, +               punctuations, +               /* overflow-safe: w_start < i */ +               (i - w_start), +               (input + w_start) +            ) < 0 +         ) +         { +            ZoO_strings_finalize(s); + +            return -1; +         } + +         ++i; + +         /* safe, as input is terminated by '\0' */ +         while (input[i] == ' ') +         { +            ++i; +         } + +         w_start = i; +      } +   } + +   if +   ( +      parse_word +      ( +         s, +         punctuations_count, +         punctuations, +         /* overflow-safe: w_start < i */ +         (i - w_start), +         (input + w_start) +      ) < 0 +   ) +   { +      ZoO_strings_finalize(s); + +      return -1; +   } + +   return 0; +}  | 


