summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/tool/strings.c')
-rw-r--r--src/tool/strings.c280
1 files changed, 280 insertions, 0 deletions
diff --git a/src/tool/strings.c b/src/tool/strings.c
new file mode 100644
index 0000000..fc4434a
--- /dev/null
+++ b/src/tool/strings.c
@@ -0,0 +1,280 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../io/error.h"
+
+#include "strings.h"
+
+
+void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1])
+{
+ s->words_count = 0;
+ s->words = (ZoO_char **) NULL;
+ s->word_sizes = (size_t *) NULL;
+}
+
+void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1])
+{
+ if (s->words_count != 0)
+ {
+ ZoO_index i;
+
+ for (i = 0; i < s->words_count; ++i)
+ {
+ free((void *) s->words[i]);
+ }
+
+ s->words_count = 0;
+
+ free((void *) s->words);
+ free((void *) s->word_sizes);
+
+ s->words = (ZoO_char **) NULL;
+ s->word_sizes = (size_t *) NULL;
+ }
+}
+
+static int add_word
+(
+ struct ZoO_strings s [const restrict static 1],
+ size_t const line_size,
+ const ZoO_char line [const restrict static line_size]
+)
+{
+ size_t * new_s_word_sizes;
+ ZoO_char * new_word, ** new_s_words;
+
+ if (s->words_count == ZoO_INDEX_MAX)
+ {
+ ZoO_S_WARNING("Data input sentence has too many words.");
+
+ return -1;
+ }
+
+ /* overflow-safe, as line_size < SIZE_MAX */
+ new_word = (ZoO_char *) calloc((line_size + 1), sizeof(ZoO_char));
+
+ if (new_word == (ZoO_char *) NULL)
+ {
+ ZoO_S_WARNING("Unable to allocate memory to extract new word.");
+
+ return -1;
+ }
+
+ memcpy((void *) new_word, (const void *) line, line_size);
+
+ new_word[line_size] = '\0';
+
+ new_s_words =
+ (ZoO_char **) realloc
+ (
+ (void *) s->words,
+ /* XXX: (sizeof() * _) assumed overflow-safe. */
+ /* (di->words_count + 1) overflow-safe */
+ (sizeof(ZoO_char *) * (s->words_count + 1))
+ );
+
+ if (new_s_words == (ZoO_char **) NULL)
+ {
+ ZoO_S_WARNING("Unable to reallocate memory to extract new word.");
+
+ free((void *) new_word);
+
+ return -1;
+ }
+
+ s->words = new_s_words;
+
+ new_s_word_sizes =
+ (size_t *) realloc
+ (
+ (void *) s->word_sizes,
+ /* XXX: (sizeof() * _) assumed overflow-safe. */
+ /* (di->words_count + 1) overflow-safe */
+ (sizeof(size_t) * (s->words_count + 1))
+ );
+
+ if (new_s_word_sizes == (size_t *) NULL)
+ {
+ ZoO_S_WARNING("Unable to reallocate memory to extract new word.");
+
+ free((void *) new_word);
+
+ return -1;
+ }
+
+ s->word_sizes = new_s_word_sizes;
+
+ s->words[s->words_count] = new_word;
+ s->word_sizes[s->words_count] = (line_size + 1);
+
+ s->words_count += 1;
+
+ return 0;
+}
+
+static int parse_word
+(
+ struct ZoO_strings s [const restrict static 1],
+ ZoO_index const punctuations_count,
+ const ZoO_char punctuations [const restrict static punctuations_count],
+ size_t const line_size,
+ ZoO_char line [const static line_size]
+)
+{
+ ZoO_index j;
+
+ if (line_size == 0)
+ {
+ return 0;
+ }
+
+ for (j = 0; j < line_size; ++j)
+ {
+ switch (line[j])
+ {
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ line[j] = 'z' - ('Z' - line[j]);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ for (j = 0; j < punctuations_count; ++j)
+ {
+ /* overflow-safe: line_size > 1 */
+ if (line[line_size - 1] == punctuations[j])
+ {
+ if (line_size > 1)
+ {
+ if
+ (
+ /* overflow-safe: line_size > 1 */
+ (add_word(s, (line_size - 1), line) < 0)
+ /* overflow-safe: line_size > 1 */
+ /* prevents line[restrict] */
+ || (add_word(s, 1, (line + (line_size - 1))) < 0)
+ )
+ {
+ return -1;
+ }
+
+ return 0;
+ }
+ }
+ }
+
+ return add_word(s, line_size, line);
+}
+
+int ZoO_strings_parse
+(
+ struct ZoO_strings s [const restrict static 1],
+ size_t const input_size,
+ ZoO_char input [const restrict],
+ ZoO_index const punctuations_count,
+ const ZoO_char punctuations [const restrict static punctuations_count]
+)
+{
+ size_t i, w_start;
+
+ ZoO_strings_finalize(s);
+
+ if (input == NULL)
+ {
+ return 0;
+ }
+
+ i = 0;
+
+ /* overflow-safe: input is '\0' terminated. */
+ while (input[i] == ' ')
+ {
+ ++i;
+ }
+
+ w_start = i;
+
+ for (; i < input_size; ++i)
+ {
+ if (input[i] == ' ')
+ {
+ if
+ (
+ parse_word
+ (
+ s,
+ punctuations_count,
+ punctuations,
+ /* overflow-safe: w_start < i */
+ (i - w_start),
+ (input + w_start)
+ ) < 0
+ )
+ {
+ ZoO_strings_finalize(s);
+
+ return -1;
+ }
+
+ ++i;
+
+ /* safe, as input is terminated by '\0' */
+ while (input[i] == ' ')
+ {
+ ++i;
+ }
+
+ w_start = i;
+ }
+ }
+
+ if
+ (
+ parse_word
+ (
+ s,
+ punctuations_count,
+ punctuations,
+ /* overflow-safe: w_start < i */
+ (i - w_start),
+ (input + w_start)
+ ) < 0
+ )
+ {
+ ZoO_strings_finalize(s);
+
+ return -1;
+ }
+
+ return 0;
+}