| summaryrefslogtreecommitdiff |
diff options
| author | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-05-05 14:59:28 +0200 |
|---|---|---|
| committer | Nathanael Sensfelder <SpamShield0@MultiAgentSystems.org> | 2016-05-05 14:59:28 +0200 |
| commit | 3405b0c1635843cbb81f042364bfcf238d7dc930 (patch) | |
| tree | 39501fec9ec72863c929a45dbc297412bbf90688 | |
| parent | c28bb6d31a122ec983e1e0a0dd1a8bd198098c58 (diff) | |
Adds the current code.
It's been running for close to a month on one of the IRC channels I
frequent and seems to be working fine.
One should be aware that, among other missing features, this version
does not store permanently what the bot learns. Indeed, I am currently
using a file with 431848 lines as its initial knowledge bank, making
this particular feature not a high priority one.
Also consider the fact that Zero of One converts text to underscore
before reading it but will not change its own aliases. This could
potentially be a cause for surprises when using uppercase letters in the
latter.
| -rw-r--r-- | CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/CMakeLists.txt | 5 | ||||
| -rw-r--r-- | src/core/CMakeLists.txt | 10 | ||||
| -rw-r--r-- | src/core/assimilate.c | 232 | ||||
| -rw-r--r-- | src/core/create_sentences.c | 486 | ||||
| -rw-r--r-- | src/core/knowledge.c | 447 | ||||
| -rw-r--r-- | src/core/knowledge.h | 42 | ||||
| -rw-r--r-- | src/core/knowledge_types.h | 46 | ||||
| -rw-r--r-- | src/core/main.c | 296 | ||||
| -rw-r--r-- | src/core/state_types.h | 16 | ||||
| -rw-r--r-- | src/io/CMakeLists.txt | 8 | ||||
| -rw-r--r-- | src/io/data_input.c | 98 | ||||
| -rw-r--r-- | src/io/data_input.h | 21 | ||||
| -rw-r--r-- | src/io/data_input_types.h | 16 | ||||
| -rw-r--r-- | src/io/error.h | 146 | ||||
| -rw-r--r-- | src/io/network.c | 483 | ||||
| -rw-r--r-- | src/io/network.h | 27 | ||||
| -rw-r--r-- | src/io/network_types.h | 26 | ||||
| -rw-r--r-- | src/io/parameters.c | 354 | ||||
| -rw-r--r-- | src/io/parameters.h | 13 | ||||
| -rw-r--r-- | src/io/parameters_types.h | 20 | ||||
| -rw-r--r-- | src/pervasive.h | 59 | ||||
| -rw-r--r-- | src/tool/CMakeLists.txt | 7 | ||||
| -rw-r--r-- | src/tool/strings.c | 280 | ||||
| -rw-r--r-- | src/tool/strings.h | 19 | ||||
| -rw-r--r-- | src/tool/strings_types.h | 15 |
26 files changed, 3183 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ba70690 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +project("Zero of One") + +include(FindPkgConfig) + +add_subdirectory(src) +set(CMAKE_C_FLAGS "-D_POSIX_SOURCE -std=c99 -O2") +# ${SRC_FILES} is recursively defined in the subdirectories. +# Each subdirectory only adds the source files that are present at its level. +add_executable(zero_of_one ${SRC_FILES}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..76a73ed --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,5 @@ +add_subdirectory(core) +add_subdirectory(io) +add_subdirectory(tool) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 0000000..2722355 --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,10 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/main.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c + ${CMAKE_CURRENT_SOURCE_DIR}/create_sentences.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/core/assimilate.c b/src/core/assimilate.c new file mode 100644 index 0000000..eb6aa17 --- /dev/null +++ b/src/core/assimilate.c @@ -0,0 +1,232 @@ +#include <stdlib.h> +#include <string.h> + +#include "../io/error.h" + +#include "knowledge.h" + +static int link_to +( + ZoO_index links_count [const restrict static 1], + ZoO_index * links_occurrences [const restrict static 1], + ZoO_index * links [const restrict static 1], + ZoO_index const target +) +{ + ZoO_index i, * new_p; + + for (i = 0; i < *links_count; ++i) + { + if ((*links)[i] == target) + { + if ((*links_occurrences)[i] == ZoO_INDEX_MAX) + { + ZoO_S_WARNING + ( + "Maximum link occurrences count has been reached." + ); + + return -1; + } + + (*links_occurrences)[i] += 1; + + return 0; + } + } + + if (*links_count == ZoO_INDEX_MAX) + { + ZoO_S_WARNING("Maximum links count has been reached."); + + return -1; + } + + new_p = + (ZoO_index *) realloc + ( + *links_occurrences, + ( + ( + /* Safe: *links_count < ZoO_INDEX_MAX */ + (size_t) (*links_count + 1) + ) + * sizeof(ZoO_index) + ) + ); + + if (new_p == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Could not reallocate a link occurrences list."); + + return -1; + } + + new_p[*links_count] = 1; + + *links_occurrences = new_p; + + new_p = + (ZoO_index *) realloc + ( + *links, + ( + ( + /* Safe: *links_count < ZoO_INDEX_MAX */ + (size_t) (*links_count + 1) + ) * sizeof(ZoO_index) + ) + ); + + if (new_p == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Could not reallocate a link list."); + + return -1; + } + + new_p[*links_count] = target; + + *links = new_p; + + *links_count += 1; + + return 0; +} + +static int link_words +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index const a, + ZoO_index const b +) +{ + int error; + + error = + link_to + ( + &(k->words[a].forward_links_count), + &(k->words[a].forward_links_occurrences), + &(k->words[a].forward_links), + b + ); + + error = + ( + link_to + ( + &(k->words[b].backward_links_count), + &(k->words[b].backward_links_occurrences), + &(k->words[b].backward_links), + a + ) + | error + ); + + return error; +} + +int ZoO_knowledge_assimilate +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index const aliases_count, + const char * restrict aliases [const restrict static aliases_count] +) +{ + int error; + ZoO_index curr_word, next_word; + ZoO_index curr_word_id, next_word_id; + + curr_word = 0; + + if (string->words_count == 0) + { + return 0; + } + + for (curr_word = 0; curr_word < aliases_count; ++curr_word) + { + if (ZoO_IS_PREFIX(aliases[curr_word], string->words[0])) + { + return 0; + } + } + + curr_word = 0; + + if (ZoO_knowledge_learn(k, string->words[curr_word], &curr_word_id) < 0) + { + return -1; + } + + if (link_words(k, ZoO_WORD_START_OF_LINE, curr_word_id) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not indicate that '" + ZoO_CHAR_STRING_SYMBOL + "' was the first word of the sentence.", + string->words[0] + ); + } + + next_word = 1; + + error = 0; + + while (next_word < string->words_count) + { + /* prevents words [restrict], k [restrict] */ + if (ZoO_knowledge_learn(k, string->words[next_word], &next_word_id) < 0) + { + return -1; + } + + if (link_words(k, curr_word_id, next_word_id) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not add a link between words '" + ZoO_CHAR_STRING_SYMBOL + "' and '" + ZoO_CHAR_STRING_SYMBOL + "'.", + string->words[curr_word], + string->words[next_word] + ); + } + + curr_word = next_word; + curr_word_id = next_word_id; + /* + * Safe: + * - next_word < words_count + * - words_count =< ZoO_INDEX_MAX + * ---- + * next_word < ZoO_INDEX_MAX + */ + next_word += 1; + } + + if (link_words(k, curr_word_id, ZoO_WORD_END_OF_LINE) < 0) + { + error = -1; + + ZoO_WARNING + ( + "Could not indicate that '" + ZoO_CHAR_STRING_SYMBOL + "' was the last word of the sentence.", + string->words[curr_word_id] + ); + } + + return error; +} + diff --git a/src/core/create_sentences.c b/src/core/create_sentences.c new file mode 100644 index 0000000..bc410e5 --- /dev/null +++ b/src/core/create_sentences.c @@ -0,0 +1,486 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +static ZoO_index pick_an_index +( + ZoO_index const occurrences, + const ZoO_index links_occurrences [const restrict static 1], + const ZoO_index links [const restrict static 1] +) +{ + ZoO_index result, accumulator, random_number; + + result = 0; + accumulator = links_occurrences[0]; + random_number = (((ZoO_index) rand()) % occurrences); + + while (accumulator < random_number) + { + + /* + * Should be safe: + * result overflowing <-> sum('links_occurrences') > 'occurrences' + * and sum('links_occurrences') == 'occurrences' + */ + result += 1; + + /* + * Should be safe: + * - sum('links_occurrences') == 'occurrences'. + * - 'occurrences' is safe. + * ---- + * 'accumulator' is safe. + */ + accumulator += links_occurrences[result]; + } + + return links[result]; +} + +static unsigned char * extend_left +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index word_id, + ZoO_char current_sentence [static 1], + size_t sentence_size [const restrict static 1], + ZoO_index credits [const static 1] +) +{ + size_t addition_size; + struct ZoO_knowledge_word * w; + ZoO_char * next_sentence; + + w = (k->words + word_id); + + if + ( + (w->special == ZoO_WORD_STARTS_SENTENCE) + || (w->occurrences == 0) + ) + { + return current_sentence; + } + + /* prevents current_sentence [restrict] */ + next_sentence = current_sentence; + + for (;;) + { + if (*credits == 0) + { + return current_sentence; + } + + *credits -= 1; + word_id = + pick_an_index + ( + w->occurrences, + w->backward_links_occurrences, + w->backward_links + ); + + w = (k->words + word_id); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + /* FIXME: not overflow-safe. */ + /* word also contains an '\0', which we will replace by a ' ' */ + addition_size = w->word_size; + break; + + case ZoO_WORD_ENDS_SENTENCE: + ZoO_S_WARNING("END OF LINE should not be prefixable."); + return current_sentence; + + case ZoO_WORD_STARTS_SENTENCE: + return current_sentence; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word also contains an '\0', which we will remove. */ + addition_size = w->word_size - 1; + break; + } + + if (*sentence_size > (SIZE_MAX - addition_size)) + { + ZoO_S_WARNING + ( + "Sentence construction aborted to avoid size_t overflow." + ); + + return current_sentence; + } + + next_sentence = + (ZoO_char *) calloc + ( + /* overflow-safe */ + (*sentence_size + addition_size), + sizeof(ZoO_char) + ); + + if (next_sentence == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to store new sentence."); + + return current_sentence; + } + + /* overflow-safe */ + *sentence_size = (*sentence_size + addition_size); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + next_sentence, + *sentence_size, + " " ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + current_sentence + ); + break; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + current_sentence + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + w->word, + /* Safe: strlen(current_sentence) >= 2 */ + (current_sentence + 1) + ); + break; + + default: + /* TODO: PROGRAM LOGIC ERROR */ + break; + } + + free((void *) current_sentence); + + /* prevents current_sentence [const] */ + current_sentence = next_sentence; + } +} + +static unsigned char * extend_right +( + struct ZoO_knowledge k [const restrict static 1], + ZoO_index word_id, + ZoO_char current_sentence [static 1], + size_t sentence_size [const restrict static 1], + ZoO_index credits [const static 1] +) +{ + size_t addition_size; + struct ZoO_knowledge_word * w; + ZoO_char * next_sentence; + + w = (k->words + word_id); + + if + ( + (w->special == ZoO_WORD_ENDS_SENTENCE) + || (w->occurrences == 0) + ) + { + return current_sentence; + } + + /* prevents current_sentence [restrict] */ + next_sentence = current_sentence; + + for (;;) + { + if (*credits == 0) + { + return current_sentence; + } + + *credits -= 1; + + word_id = + pick_an_index + ( + w->occurrences, + w->forward_links_occurrences, + w->forward_links + ); + + w = (k->words + word_id); + + switch (w->special) + { + case ZoO_WORD_HAS_NO_EFFECT: + /* FIXME: Assumed to be overflow-safe. */ + /* word also contains an '\0', which we will replace by a ' '. */ + addition_size = w->word_size; + break; + + case ZoO_WORD_ENDS_SENTENCE: + return current_sentence; + + case ZoO_WORD_STARTS_SENTENCE: + ZoO_S_WARNING("START OF LINE should not be suffixable."); + return current_sentence; + + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word also contains an '\0', which we will remove. */ + addition_size = w->word_size - 1; + break; + } + + if (*sentence_size > (SIZE_MAX - addition_size)) + { + ZoO_S_WARNING + ( + "Sentence construction aborted to avoid size_t overflow." + ); + + return current_sentence; + } + + next_sentence = + (ZoO_char *) calloc + ( + /* overflow-safe */ + (*sentence_size + addition_size), + sizeof(ZoO_char) + ); + + if (next_sentence == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to store new sentence."); + + return current_sentence; + } + + /* overflow-safe */ + *sentence_size = (*sentence_size + addition_size); + + switch (w->special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + printf + ( + "current sentence:'%s', pointing at '%c'.\n", + current_sentence, + current_sentence[*sentence_size - addition_size - 2] + ); + current_sentence[*sentence_size - addition_size - 2] = '\0'; + + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL " ", + current_sentence, + w->word + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + next_sentence, + *sentence_size, + ZoO_CHAR_STRING_SYMBOL ZoO_CHAR_STRING_SYMBOL, + current_sentence, + w->word + ); + break; + + default: + /* TODO: PROGRAM LOGIC ERROR */ + break; + } + + free((void *) current_sentence); + + /* prevents current_sentence [const] */ + current_sentence = next_sentence; + } +} + +int ZoO_knowledge_extend +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const static 1], + int const ignore_first_word, + ZoO_char * result [const static 1] +) +{ + int word_found; + size_t sentence_size; + ZoO_index i, word_id, word_min_score, word_min_id, credits; + + word_found = 0; + credits = ZoO_MAX_REPLY_WORDS; + + if (ignore_first_word) + { + i = 1; + } + else + { + i = 0; + } + + for (; i < string->words_count; ++i) + { + /* prevents k [restrict] */ + if (ZoO_knowledge_find(k, string->words[i], &word_min_id) == 0) + { + word_found = 1; + word_min_score = k->words[word_min_id].occurrences; + + break; + } + } + + if (word_found == 0) + { + word_min_id = (rand() % k->words_count); + word_min_score = k->words[word_min_id].occurrences; + } + + for (; i < string->words_count; ++i) + { + if + ( + (ZoO_knowledge_find(k, string->words[i], &word_id) == 0) + && (k->words[word_id].occurrences < word_min_score) + ) + { + word_min_score = k->words[word_id].occurrences; + word_min_id = word_id; + } + } + + /* 3: 2 spaces + '\0' */ + /* FIXME: not overflow-safe */ + switch (k->words[word_min_id].special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + case ZoO_WORD_REMOVES_RIGHT_SPACE: + /* word + ' ' + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 2); + break; + + case ZoO_WORD_HAS_NO_EFFECT: + /* word + ' ' * 2 + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 3); + break; + + default: + ZoO_WARNING + ( + "'%s' was unexpectedly selected as pillar.", + k->words[word_min_id].word + ); + /* word + '[' + ']' + ' ' * 2 + '\0' */ + sentence_size = (strlen(k->words[word_min_id].word) + 5); + break; + } + + *result = (ZoO_char *) calloc(sentence_size, sizeof(ZoO_char)); + + if (*result == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Could not allocate memory to start sentence."); + + return -2; + } + + switch (k->words[word_min_id].special) + { + case ZoO_WORD_REMOVES_LEFT_SPACE: + snprintf + ( + *result, + sentence_size, + ZoO_CHAR_STRING_SYMBOL " ", + k->words[word_min_id].word + ); + break; + + case ZoO_WORD_REMOVES_RIGHT_SPACE: + snprintf + ( + *result, + sentence_size, + " " ZoO_CHAR_STRING_SYMBOL, + k->words[word_min_id].word + ); + break; + + case ZoO_WORD_HAS_NO_EFFECT: + snprintf + ( + *result, + sentence_size, + " " ZoO_CHAR_STRING_SYMBOL " ", + k->words[word_min_id].word + ); + break; + + default: + snprintf + ( + *result, + sentence_size, + " [" ZoO_CHAR_STRING_SYMBOL "] ", + k->words[word_min_id].word + ); + break; + } + + if ((word_min_score == 0) || (credits == 0)) + { + return 0; + } + + --credits; + + /* prevents result [restrict] */ + *result = extend_left(k, word_min_id, *result, &sentence_size, &credits); + + if (*result == (ZoO_char *) NULL) + { + return -2; + } + + *result = extend_right(k, word_min_id, *result, &sentence_size, &credits); + + if (*result == (ZoO_char *) NULL) + { + return -2; + } + + return 0; +} diff --git a/src/core/knowledge.c b/src/core/knowledge.c new file mode 100644 index 0000000..31ccb97 --- /dev/null +++ b/src/core/knowledge.c @@ -0,0 +1,447 @@ +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "knowledge.h" + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_punctuation_chars_count = 7; +const ZoO_char const ZoO_knowledge_punctuation_chars[7] = + { + '!', + ',', + '.', + ':', + ';', + '?', + '~' + }; + +/* XXX: are we as close to immutable as we want to be? */ +unsigned int const ZoO_knowledge_forbidden_chars_count = 8; +const ZoO_char const ZoO_knowledge_forbidden_chars[8]= + { + '(', + ')', + '[', + ']', + '{', + '}', + '<', + '>' + }; + +int ZoO_knowledge_find +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + int cmp; + ZoO_index i, current_min, current_max; + + /* This is a binary search. */ + + if (k->words_count < 1) + { + *result = 0; + + return -1; + } + + current_min = 0; + + /* overflow-safe: k->words_count >= 1 */ + current_max = (k->words_count - 1); + + for (;;) + { + /* FIXME: overflow-safe? */ + i = ((current_min + current_max) / 2); + + if (i == k->words_count) + { + *result = k->words_count; + + return -1; + } + + cmp = + /* XXX: Assumed to be compatible with ZoO_char */ + strcmp + ( + (char *) word, + (const char *) k->words[k->sorted_indices[i]].word + ); + + if (cmp > 0) + { + if ((current_min > current_max)) + { + *result = (i + 1); + + return -1; + } + + /* FIXME: overflow-safe? */ + current_min = (i + 1); + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = i; + + return -1; + } + + /* overflow-safe */ + current_max = (i - 1); + } + else + { + *result = k->sorted_indices[i]; + + return 0; + } + } +} + +static void word_init (struct ZoO_knowledge_word w [const restrict static 1]) +{ + w->word_size = 0; + w->word = (ZoO_char *) NULL; + w->special = ZoO_WORD_HAS_NO_EFFECT; + w->occurrences = 1; + w->forward_links_count = 0; + w->backward_links_count = 0; + w->forward_links_occurrences = (ZoO_index *) NULL; + w->backward_links_occurrences = (ZoO_index *) NULL; + w->forward_links = (ZoO_index *) NULL; + w->backward_links = (ZoO_index *) NULL; +} + +static int add_punctuation_nodes +( + struct ZoO_knowledge k [const static 1] +) +{ + int error; + char w[2]; + ZoO_index i, id; + + if (ZoO_knowledge_learn(k, "START OF LINE", &id) < 0) + { + ZoO_S_FATAL("Could not add 'START OF LINE' to knowledge."); + + return -2; + } + + k->words[id].special = ZoO_WORD_STARTS_SENTENCE; + k->words[id].occurrences = 0; + + if (ZoO_knowledge_learn(k, "END OF LINE", &id) < 0) + { + ZoO_S_FATAL("Could not add 'END OF LINE' to knowledge."); + + return -2; + } + + k->words[id].special = ZoO_WORD_ENDS_SENTENCE; + k->words[id].occurrences = 0; + + w[1] = '\0'; + + error = 0; + + for (i = 0; i < ZoO_knowledge_punctuation_chars_count; ++i) + { + w[0] = ZoO_knowledge_punctuation_chars[i]; + + if (ZoO_knowledge_learn(k, w, &id) < 0) + { + ZoO_WARNING("Could not add '%s' to knowledge.", w); + + error = -1; + } + else + { + k->words[id].special = ZoO_WORD_REMOVES_LEFT_SPACE; + k->words[id].occurrences = 0; + } + } + + return error; +} + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) +{ + k->words_count = 0; + k->words = (struct ZoO_knowledge_word *) NULL; + k->sorted_indices = (ZoO_index *) NULL; + + if (add_punctuation_nodes(k) < -1) + { + ZoO_knowledge_finalize(k); + + return -1; + } + + return 0; +} + +static void finalize_word +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + if (w->word != (ZoO_char *) NULL) + { + free((void *) w->word); + + w->word = (ZoO_char *) NULL; + } + + if (w->forward_links_occurrences != (ZoO_index *) NULL) + { + free((void *) w->forward_links_occurrences); + + w->forward_links_occurrences = (ZoO_index *) NULL; + } + + if (w->backward_links_occurrences != (ZoO_index *) NULL) + { + free((void *) w->backward_links_occurrences); + + w->backward_links_occurrences = (ZoO_index *) NULL; + } + + if (w->forward_links != (ZoO_index *) NULL) + { + free((void *) w->forward_links); + + w->forward_links = (ZoO_index *) NULL; + } + + if (w->backward_links != (ZoO_index *) NULL) + { + free((void *) w->backward_links); + + w->backward_links = (ZoO_index *) NULL; + } + + w->forward_links_count = 0; + w->backward_links_count = 0; +} + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) +{ + ZoO_index i; + + for (i = 0; i < k->words_count; ++i) + { + /* prevents k [restrict] */ + finalize_word(k->words + i); + } + + k->words_count = 0; + + if (k->words != (struct ZoO_knowledge_word *) NULL) + { + free((void *) k->words); + + k->words = (struct ZoO_knowledge_word *) NULL; + } + + if (k->sorted_indices != (ZoO_index *) NULL) + { + free((void *) k->sorted_indices); + + k->sorted_indices = (ZoO_index *) NULL; + } +} + +int ZoO_knowledge_learn +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +) +{ + struct ZoO_knowledge_word * new_wordlist; + ZoO_index * new_sorted_indices; + ZoO_index temp; + + /* prevents k [restrict] */ + if (ZoO_knowledge_find(k, word, result) == 0) + { + if (k->words[*result].occurrences == ZoO_INDEX_MAX) + { + ZoO_WARNING + ( + "Maximum number of occurrences has been reached for word '" + ZoO_CHAR_STRING_SYMBOL + "'.", + word + ); + + return -1; + } + + /* overflow-safe */ + k->words[*result].occurrences += 1; + + return 0; + } + + if (k->words_count == ZoO_INDEX_MAX) + { + ZoO_S_WARNING("Maximum number of words has been reached."); + + return -1; + } + + new_wordlist = + (struct ZoO_knowledge_word *) realloc + ( + (void *) k->words, + ( + ( + /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ + (size_t) (k->words_count + 1) + ) + * sizeof(struct ZoO_knowledge_word) + ) + ); + + if (new_wordlist == (struct ZoO_knowledge_word *) NULL) + { + ZoO_ERROR + ( + "Could not learn the word '%s': unable to realloc the word list.", + word + ); + + return -1; + } + + k->words = new_wordlist; + + new_sorted_indices = + (ZoO_index *) realloc + ( + (void *) k->sorted_indices, + ( + ( + /* overflow-safe: k->words_count < ZoO_INDEX_MAX */ + (size_t) (k->words_count + 1) + ) + * sizeof(ZoO_index) + ) + ); + + if (new_sorted_indices == (ZoO_index *) NULL) + { + ZoO_ERROR + ( + "Could not learn the word '" + ZoO_CHAR_STRING_SYMBOL + "': unable to realloc the index list.", + word + ); + + return -1; + } + + k->sorted_indices = new_sorted_indices; + + /* We can only move indices right of *result if they exist. */ + if (*result != k->words_count) + { + /* TODO: check if correct. */ + memmove + ( + /* + * overflow-safe: + * - k->words_count < ZoO_INDEX_MAX + * - (k->sorted_indices + *result + 1) =< k->words_count + */ + (void *) (k->sorted_indices + *result + 1), + /* overflow-safe: see above */ + (const void *) (k->sorted_indices + *result), + ( + ( + /* overflow-safe: *result < k->words_count */ + (size_t) (k->words_count - *result) + ) + * sizeof(ZoO_index) + ) + ); + } + + temp = *result; + + k->sorted_indices[*result] = k->words_count; + + *result = k->words_count; + + word_init(k->words + *result); + + /* XXX: strlen assumed to work with ZoO_char. */ + k->words[*result].word_size = strlen(word); + + if (k->words[*result].word_size == SIZE_MAX) + { + ZoO_S_WARNING + ( + "Could not learn word that had a size too big to store in a '\\0' " + "terminated string. Chances are, this is but a symptom of the real " + "problem." + ); + + return -1; + } + + /* We also need '\0' */ + k->words[*result].word_size += 1; + + k->words[*result].word = + (ZoO_char *) calloc + ( + k->words[*result].word_size, + sizeof(ZoO_char) + ); + + if (k->words[*result].word == (ZoO_char *) NULL) + { + ZoO_S_ERROR + ( + "Could not learn word due to being unable to allocate the memory to " + "store it." + ); + + k->words[*result].word_size = 0; + + return -1; + } + + memcpy(k->words[*result].word, word, k->words[*result].word_size); + + /* Safe: k->words_count < ZoO_INDEX_MAX */ + k->words_count += 1; + + ZoO_DEBUG + ( + ZoO_DEBUG_LEARNING, + "Learned word {'%s', id: %u, rank: %u}", + word, + *result, + temp + ); + + return 0; +} + diff --git a/src/core/knowledge.h b/src/core/knowledge.h new file mode 100644 index 0000000..f20cb16 --- /dev/null +++ b/src/core/knowledge.h @@ -0,0 +1,42 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_H_ +#define _ZoO_CORE_KNOWLEDGE_H_ + +#include "../tool/strings_types.h" + +#include "knowledge_types.h" + +int ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]); + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const static 1]); + +int ZoO_knowledge_find +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_learn +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_assimilate +( + struct ZoO_knowledge k [const static 1], + struct ZoO_strings string [const restrict static 1], + ZoO_index const aliases_count, + const char * restrict aliases [const restrict static aliases_count] +); + +int ZoO_knowledge_extend +( + struct ZoO_knowledge k [const static 1], + const struct ZoO_strings string [const static 1], + int const ignore_first_word, + ZoO_char * result [const static 1] +); + +#endif diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h new file mode 100644 index 0000000..f2e8161 --- /dev/null +++ b/src/core/knowledge_types.h @@ -0,0 +1,46 @@ +#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ +#define _ZoO_CORE_KNOWLEDGE_TYPES_H_ + +#include "../pervasive.h" + +#define ZoO_WORD_START_OF_LINE 0 +#define ZoO_WORD_END_OF_LINE 1 + +/* XXX: are we as close to immutable as we want to be? */ +extern unsigned int const ZoO_knowledge_punctuation_chars_count; +extern const ZoO_char const ZoO_knowledge_punctuation_chars[7]; +extern unsigned int const ZoO_knowledge_forbidden_chars_count; +extern const ZoO_char const ZoO_knowledge_forbidden_chars[8]; + + +enum ZoO_knowledge_special_effect +{ + ZoO_WORD_HAS_NO_EFFECT, + ZoO_WORD_ENDS_SENTENCE, + ZoO_WORD_STARTS_SENTENCE, + ZoO_WORD_REMOVES_LEFT_SPACE, + ZoO_WORD_REMOVES_RIGHT_SPACE +}; + +struct ZoO_knowledge_word +{ + size_t word_size; + ZoO_char * word; + enum ZoO_knowledge_special_effect special; + ZoO_index occurrences; + ZoO_index forward_links_count; + ZoO_index backward_links_count; + ZoO_index * forward_links_occurrences; + ZoO_index * backward_links_occurrences; + ZoO_index * forward_links; + ZoO_index * backward_links; +}; + +struct ZoO_knowledge +{ + ZoO_index words_count; + ZoO_index * sorted_indices; + struct ZoO_knowledge_word * words; +}; + +#endif diff --git a/src/core/main.c b/src/core/main.c new file mode 100644 index 0000000..34233d6 --- /dev/null +++ b/src/core/main.c @@ -0,0 +1,296 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <signal.h> + +#include "../tool/strings.h" + +#include "../io/error.h" +#include "../io/parameters.h" +#include "../io/data_input.h" +#include "../io/network.h" + + +#include "knowledge.h" + +#include "state_types.h" + +static int run = 1; + +static void request_termination (int const signo) +{ + if ((signo == SIGINT) || (signo == SIGTERM)) + { + run = 0; + } +} + +static int initialize +( + struct ZoO_state s [const static 1], + int const argc, + const char * argv [const static argc] +) +{ + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing..."); + + srand(time(NULL)); + + /* prevents s [restrict] */ + if (ZoO_knowledge_initialize(&(s->knowledge)) < 0) + { + return -1; + } + + if (ZoO_parameters_initialize(&(s->param), argc, argv) < 0) + { + ZoO_knowledge_finalize(&(s->knowledge)); + + return -1; + } + + return 0; +} + +static int load_data_file (struct ZoO_state s [const static 1]) +{ + struct ZoO_data_input input; + char * result; + + if (ZoO_data_input_open(&input, s->param.data_filename) < 0) + { + return -1; + } + + while + ( + ZoO_data_input_read_line + ( + &input, + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) == 0 + ) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + &(input.string), + s->param.aliases_count, + s->param.aliases + ); + } + + ZoO_data_input_close(&input); + + return 0; +} + +static int finalize (struct ZoO_state s [const static 1]) +{ + int error; + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing..."); + + error = 0; + + /* prevents s [restrict] */ + ZoO_knowledge_finalize(&(s->knowledge)); + + return error; +} + +static int network_connect (struct ZoO_state s [const static 1]) +{ + return + ZoO_network_connect + ( + &(s->network), + s->param.irc_server_addr, + s->param.irc_server_port, + s->param.irc_server_channel, + s->param.irc_username, + s->param.irc_realname, + s->param.aliases[0] + ); +} + +static int should_reply +( + struct ZoO_parameters param [const restrict static 1], + struct ZoO_strings string [const restrict static 1], + int should_learn [const restrict static 1] +) +{ + ZoO_index i, j; + + for (i = 0; i < param->aliases_count; ++i) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[0])) + { + *should_learn = 0; + + return 1; + } + + for (j = 1; j < string->words_count; ++j) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[j])) + { + *should_learn = 1; + + return 1; + } + } + } + + *should_learn = 1; + + return (param->reply_rate >= (rand() % 100)); +} + +static void handle_message +( + struct ZoO_state s [const static 1], + struct ZoO_strings string [const restrict static 1], + ssize_t const msg_offset, + ssize_t const msg_size +) +{ + ZoO_char * line; + int reply, learn; + + if + ( + ZoO_strings_parse + ( + string, + (size_t) msg_size, + (s->network.msg + msg_offset), + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) < 0 + ) + { + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg."); + + return; + } + + if (string->words_count == 0) + { + return; + } + + reply = should_reply(&(s->param), string, &learn); + + if + ( + reply + && + ( + ZoO_knowledge_extend + ( + &(s->knowledge), + string, + !learn, + &line + ) == 0 + ) + ) + { + if (line[0] == ' ') + { + strcpy((s->network.msg), (line + 1)); + } + else + { + strcpy((s->network.msg), line); + } + + free((void *) line); + + ZoO_network_send(&(s->network)); + } + + if (learn) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + string, + s->param.aliases_count, + s->param.aliases + ); + } +} + +static int main_loop (struct ZoO_state s [const static 1]) +{ + struct ZoO_strings string; + ssize_t msg_offset, msg_size; + + msg_offset = 0; + msg_size = 0; + + ZoO_strings_initialize(&string); + + while (run) + { + if (ZoO_network_receive(&(s->network), &msg_offset, &msg_size) == 0) + { + handle_message(s, &string, msg_offset, msg_size); + } + } + + ZoO_strings_finalize(&string); + + ZoO_network_disconnect(&(s->network)); + + return 0; +} + +int main (int const argc, const char * argv [const static argc]) +{ + struct ZoO_state s; + + if (initialize(&s, argc, argv) < 0) + { + return -1; + } + + if (load_data_file(&s) < 0) + { + goto CRASH; + } + + if (network_connect(&s) < 0) + { + goto CRASH; + } + + if (main_loop(&s) < 0) + { + goto CRASH; + } + + (void) finalize(&s); + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally."); + + return 0; + + CRASH: + { + (void) finalize(&s); + + ZoO_S_DEBUG + ( + ZoO_DEBUG_PROGRAM_FLOW, + "Zero of One terminated by crashing." + ); + + return -1; + } +} diff --git a/src/core/state_types.h b/src/core/state_types.h new file mode 100644 index 0000000..89c814e --- /dev/null +++ b/src/core/state_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_CORE_STATE_TYPES_H_ +#define _ZoO_CORE_STATE_TYPES_H_ + +#include "../io/parameters_types.h" +#include "../io/network_types.h" + +#include "knowledge_types.h" + +struct ZoO_state +{ + struct ZoO_parameters param; + struct ZoO_knowledge knowledge; + struct ZoO_network network; +}; + +#endif diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt new file mode 100644 index 0000000..a13154b --- /dev/null +++ b/src/io/CMakeLists.txt @@ -0,0 +1,8 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c + ${CMAKE_CURRENT_SOURCE_DIR}/network.c + ${CMAKE_CURRENT_SOURCE_DIR}/data_input.c +) +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/io/data_input.c b/src/io/data_input.c new file mode 100644 index 0000000..e31d33b --- /dev/null +++ b/src/io/data_input.c @@ -0,0 +1,98 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "error.h" + +#include "data_input.h" + +int ZoO_data_input_open +( + struct ZoO_data_input di [const static 1], + const char filename [const restrict static 1] +) +{ + /* prevents di [restrict] */ + ZoO_strings_initialize(&(di->string)); + + di->file = fopen(filename, "r"); + + if (di->file == (FILE *) NULL) + { + ZoO_ERROR + ( + "Could not open file '%s' in readonly mode.", + filename + ); + + return -1; + } + + return 0; +} + +int ZoO_data_input_read_line +( + struct ZoO_data_input di [const static 1], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +) +{ + size_t line_size, i, w_start; + ZoO_char * line; + + /* prevents di [restrict] */ + ZoO_strings_finalize(&(di->string)); + + line = (ZoO_char *) NULL; + line_size = 0; + + /* XXX: assumed compatible with ZoO_char */ + + if (getline(&line, &line_size, di->file) < 1) + { + free((void *) line); + + return -1; + } + + line_size = strlen(line); + line[line_size - 1] = '\0'; + + --line_size; /* removed '\n' */ + + if + ( + ZoO_strings_parse + ( + &(di->string), + line_size, + line, + punctuations_count, + punctuations + ) < 0 + ) + { + free((void *) line); + + return -1; + } + + free((void *) line); + + return 0; +} + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]) +{ + if (di->file != (FILE *) NULL) + { + fclose(di->file); + + di->file = (FILE *) NULL; + } + + /* prevents di [restrict] */ + ZoO_strings_finalize(&(di->string)); +} diff --git a/src/io/data_input.h b/src/io/data_input.h new file mode 100644 index 0000000..a2f004b --- /dev/null +++ b/src/io/data_input.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_IO_DATA_INPUT_H_ +#define _ZoO_IO_DATA_INPUT_H_ + +#include "data_input_types.h" + +int ZoO_data_input_open +( + struct ZoO_data_input di [const static 1], + const char filename [const restrict static 1] +); + +int ZoO_data_input_read_line +( + struct ZoO_data_input di [const static 1], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +); + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]); + +#endif diff --git a/src/io/data_input_types.h b/src/io/data_input_types.h new file mode 100644 index 0000000..bd2709b --- /dev/null +++ b/src/io/data_input_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_IO_DATA_INPUT_TYPES_H_ +#define _ZoO_IO_DATA_INPUT_TYPES_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +#include "../tool/strings.h" + +struct ZoO_data_input +{ + FILE * restrict file; + struct ZoO_strings string; +}; + +#endif diff --git a/src/io/error.h b/src/io/error.h new file mode 100644 index 0000000..e4267a0 --- /dev/null +++ b/src/io/error.h @@ -0,0 +1,146 @@ +#ifndef _ZoO_IO_ERROR_H_ +#define _ZoO_IO_ERROR_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +#define ZoO_DEBUG_ALL 1 + +#ifndef ZoO_DEBUG_ALL + #define ZoO_DEBUG_ALL 0 +#endif + +#ifndef ZoO_DEBUG_PROGRAM_FLOW + #define ZoO_DEBUG_PROGRAM_FLOW (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_CONFIG + #define ZoO_DEBUG_CONFIG (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_LEARNING + #define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_NETWORK + #define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL) +#endif + +#define ZoO_ENABLE_WARNINGS_OUTPUT 1 +#define ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT 1 +#define ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT 1 +#define ZoO_ENABLE_FATAL_ERROR_OUTPUT 1 + +#ifdef ZoO_ENABLE_ERROR_LOCATION + #define ZoO_LOCATION "[" __FILE__ "][" ZoO_TO_STRING(__LINE__) "]" +#else + #define ZoO_LOCATION "" +#endif + +#define ZoO_PRINT_STDERR(symbol, str, ...)\ + fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n", __VA_ARGS__); + +/* + * Given that we use preprocessor contants as flags, we can expect the compilers + * to remove the test condition for disabled flags. No need to be shy about + * allowing many debug options. + */ + +#define ZoO_DEBUG(flag, str, ...)\ + ZoO_ISOLATE\ + (\ + if (flag)\ + {\ + ZoO_PRINT_STDERR("D", str, __VA_ARGS__);\ + }\ + ) + + +#define ZoO_WARNING(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_WARNINGS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("W", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_ERROR(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("E", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_PROG_ERROR(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("P", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_FATAL(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("F", str, __VA_ARGS__);\ + }\ + ) + +/* For outputs without dynamic content (static). ******************************/ + +#define ZoO_PRINT_S_STDERR(symbol, str)\ + fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n"); + +#define ZoO_S_DEBUG(flag, str)\ + ZoO_ISOLATE\ + (\ + if (flag)\ + {\ + ZoO_PRINT_S_STDERR("D", str);\ + }\ + ) + +#define ZoO_S_WARNING(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_WARNINGS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("W", str);\ + }\ + ) + +#define ZoO_S_ERROR(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("E", str);\ + }\ + ) + +#define ZoO_S_PROG_ERROR(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("P", str);\ + }\ + ) + +#define ZoO_S_FATAL(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("F", str);\ + }\ + ) + +#endif diff --git a/src/io/network.c b/src/io/network.c new file mode 100644 index 0000000..c8d05a2 --- /dev/null +++ b/src/io/network.c @@ -0,0 +1,483 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> + +/* "POSIX.1 does not require the inclusion of <sys/types.h>" */ +/* - man page for setsockopt */ +/* #include <sys/types.h> */ +#include <sys/socket.h> +#include <sys/time.h> + +#include "error.h" + +#include "network.h" + +static int reconnect (struct ZoO_network net [const restrict static 1]) +{ + struct timeval timeout; + int old_errno = errno; + + errno = 0; + timeout.tv_sec = ZoO_NETWORK_TIMEOUT; + timeout.tv_usec = 0; + + if (net->connection != -1) + { + close(net->connection); + } + + net->connection = + socket + ( + net->addrinfo->ai_family, + net->addrinfo->ai_socktype, + net->addrinfo->ai_protocol + ); + + if (net->connection == -1) + { + ZoO_FATAL + ( + "Could not create socket: %s.", + strerror(errno) + ); + + errno = old_errno; + + return -1; + } + + errno = 0; + + if + ( + ( + setsockopt + ( + net->connection, + SOL_SOCKET, + SO_RCVTIMEO, + (const void *) &timeout, + (socklen_t) sizeof(struct timeval) + ) < 0 + ) + || + ( + setsockopt + ( + net->connection, + SOL_SOCKET, + SO_SNDTIMEO, + (const void *) &timeout, + (socklen_t) sizeof(struct timeval) + ) < 0 + ) + ) + { + ZoO_ERROR("Could not set timeout on network socket: %s", strerror(errno)); + + errno = old_errno; + + return -1; + } + + errno = old_errno; + + ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connecting to network..."); + + if + ( + connect + ( + net->connection, + net->addrinfo->ai_addr, + net->addrinfo->ai_addrlen + ) != 0 + ) + { + ZoO_ERROR + ( + "Unable to connect to the network: %s", + strerror(errno) + ); + + errno = old_errno; + + return -1; + } + + errno = old_errno; + + snprintf + ( + net->msg, + 512, + "USER %s 8 * :%s\r\n", + net->user, + net->name + ); + + errno = 0; + + if (write(net->connection, net->msg, strlen(net->msg)) < 1) + { + ZoO_ERROR + ( + "Unable to write to the network: %s", + strerror(errno) + ); + + errno = old_errno; + + return -1; + } + + snprintf + ( + net->msg, + 512, + "NICK %s\r\n", + net->nick + ); + + errno = 0; + + if (write(net->connection, net->msg, strlen(net->msg)) < 1) + { + ZoO_ERROR + ( + "Unable to write to the network: %s", + strerror(errno) + ); + + errno = old_errno; + + return -1; + } + + errno = old_errno; + + net->buffer_remaining = 0; + net->buffer_index = 0; + ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connected."); + + return 0; +} + +int ZoO_network_connect +( + struct ZoO_network net [const static 1], + const char host [const restrict static 1], + const char port [const restrict static 1], + const char channel [const restrict static 1], + const char user [const restrict static 1], + const char name [const restrict static 1], + const char nick [const restrict static 1] +) +{ + int error; + struct addrinfo hints; + const int old_errno = errno; + + net->connection = -1; + net->channel = channel; + net->user = user; + net->name = name; + net->nick = nick; + net->buffer_index = 0; + net->buffer_remaining = 0; + + memset(&hints, 0, sizeof(struct addrinfo)); + memset(net->msg, 0, (sizeof(ZoO_char) * 513)); + + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + errno = 0; + + error = getaddrinfo(host, port, &hints, &(net->addrinfo)); + + if (error != 0) + { + if (error == EAI_SYSTEM) + { + ZoO_ERROR + ( + "Could not retrieve server information: %s.", + strerror(errno) + ); + } + else + { + ZoO_FATAL + ( + "Could not retrieve server information: %s.", + gai_strerror(error) + ); + } + + errno = old_errno; + + return -1; + } + + errno = 0; + + + reconnect(net); + + return 0; +} + +int ZoO_network_receive +( + struct ZoO_network net [const restrict static 1], + size_t msg_offset [const restrict static 1], + size_t msg_size [const restrict static 1] +) +{ + int old_errno; + ssize_t in_count, in_index, msg_index, cmd; + + old_errno = errno; + + for (;;) + { + msg_index = 0; + + errno = 0; + + while + ( + ( + (in_count = + read( + net->connection, + (net->buffer + net->buffer_index), + (512 - net->buffer_index) + ) + ) > 0 + ) + ) + { + net->buffer_remaining += in_count; + + for + ( + in_index = 0; + in_index < net->buffer_remaining; + ++in_index + ) + { + net->msg[msg_index] = net->buffer[net->buffer_index + in_index]; + + if + ( + (msg_index == 511) + || + ( + (msg_index > 0) + && (net->msg[msg_index - 1] == '\r') + && (net->msg[msg_index] == '\n') + ) + ) + { + net->msg[msg_index + 1] = '\0'; + + + if (net->buffer_index != net->buffer_remaining) + { + memmove + ( + net->buffer, + (net->buffer + net->buffer_index), + (size_t) net->buffer_remaining + ); + + net->buffer_index = 0; + } + + net->buffer_remaining -= (in_index + 1); + + errno = old_errno; + + goto READ_MSG; + } + + ++msg_index; + } + + net->buffer_remaining = 0; + net->buffer_index = 0; + + errno = 0; + } + + ZoO_ERROR + ( + "Something went wrong while trying to read from the network: %s.", + strerror(errno) + ); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -1; + } + + continue; + + READ_MSG: + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s\n", net->msg); + + /* XXX: doesn't that prevent net [restrict]? */ + if (ZoO_IS_PREFIX("PING", net->msg)) + { + errno = 0; + + net->msg[1] = 'O'; + + if (write(net->connection, net->msg, strlen(net->msg)) < 1) + { + ZoO_ERROR("Could not reply to PING request: %s", strerror(errno)); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -1; + } + + continue; + } + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s\n", net->msg); + + errno = old_errno; + } + else if (net->msg[0] == ':') + { + cmd = 0; + + for (in_index = 1; in_index < 512; in_index++) + { + if (net->msg[in_index] == ' ') + { + cmd = (in_index + 1); + + break; + } + } + + if (cmd == 0) + { + continue; + } + + if (ZoO_IS_PREFIX("001", (net->msg + cmd))) + { + snprintf + ( + net->msg, + 512, + "JOIN :%s\r\n", + net->channel + ); + + errno = 0; + + if (write(net->connection, net->msg, strlen(net->msg)) < 1) + { + ZoO_ERROR + ( + "Could not send JOIN request: %s", + strerror(errno) + ); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -1; + } + } + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->msg); + + continue; + } + + if (ZoO_IS_PREFIX("PRIVMSG", (net->msg + cmd))) + { + for (; in_index < 512; in_index++) + { + if (net->msg[in_index] == ':') + { + cmd = (in_index + 1); + + break; + } + } + + *msg_offset = cmd; + *msg_size = (msg_index - *msg_offset - 1); + + /*net->msg[*msg_size - 1] = '\0'; */ + + return 0; + } + } + } +} + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]) +{ + int const old_errno = errno; + + snprintf + ( + net->buffer, + 512, + "PRIVMSG %s :%s\r\n", + net->channel, + net->msg + ); + + errno = 0; + + if (write(net->connection, net->buffer, strlen(net->buffer)) < 1) + { + ZoO_ERROR + ( + "Could not send PRIVMSG: %s.", + strerror(errno) + ); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -2; + } + else + { + return -1; + } + } + + errno = old_errno; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->buffer); + + return 0; +} + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]) +{ + freeaddrinfo(net->addrinfo); + close(net->connection); +} + diff --git a/src/io/network.h b/src/io/network.h new file mode 100644 index 0000000..ac7284a --- /dev/null +++ b/src/io/network.h @@ -0,0 +1,27 @@ +#ifndef _ZoO_IO_NETWORK_H_ +#define _ZoO_IO_NETWORK_H_ +#include "network_types.h" + +int ZoO_network_connect +( + struct ZoO_network net [const static 1], + const char host [const restrict static 1], + const char port [const restrict static 1], + const char channel [const restrict static 1], + const char user [const restrict static 1], + const char name [const restrict static 1], + const char nick [const restrict static 1] +); + +int ZoO_network_receive +( + struct ZoO_network net [const static 1], + size_t msg_offset [const restrict static 1], + size_t msg_size [const restrict static 1] +); + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]); + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]); + +#endif diff --git a/src/io/network_types.h b/src/io/network_types.h new file mode 100644 index 0000000..16c81da --- /dev/null +++ b/src/io/network_types.h @@ -0,0 +1,26 @@ +#ifndef _ZoO_IO_NETWORK_TYPES_H_ +#define _ZoO_IO_NETWORK_TYPES_H_ + +#define POSIX_C_SOURCE + +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> + +#include "../pervasive.h" + +struct ZoO_network +{ + size_t buffer_index; + size_t buffer_remaining; + struct addrinfo * addrinfo; + ZoO_char buffer [513]; + ZoO_char msg [513]; + int connection; + const char * restrict channel; + const char * restrict user; + const char * restrict name; + const char * restrict nick; +}; + +#endif diff --git a/src/io/parameters.c b/src/io/parameters.c new file mode 100644 index 0000000..0f7d05c --- /dev/null +++ b/src/io/parameters.c @@ -0,0 +1,354 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> + +#include "../pervasive.h" + +#include "error.h" + +#include "parameters.h" + +static void load_default_parameters +( + struct ZoO_parameters param [const restrict static 1] +) +{ + param->data_filename = ZoO_DEFAULT_DATA_FILENAME; + + param->irc_server_addr = ZoO_DEFAULT_IRC_SERVER_ADDR; + param->irc_server_port = ZoO_DEFAULT_IRC_SERVER_PORT; + param->irc_server_channel = ZoO_DEFAULT_IRC_SERVER_CHANNEL; + param->irc_username = ZoO_DEFAULT_IRC_USERNAME; + param->irc_realname = ZoO_DEFAULT_IRC_REALNAME; + + param->reply_rate = ZoO_DEFAULT_REPLY_RATE; + + param->aliases_count = 0; + param->aliases = NULL; +} + +static void print_help (const char exec [const restrict static 1]) +{ + printf + ( + "Usage: %s [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...] \n" + "NICKNAME is used as the IRC nickname value.\n" + "If NICKNAME or any ALIAS is found in an event, the program will reply.\n" + "\nAvailable options:\n" + " [--data-filename | -df] FILENAME\n" + " Learn content from FILENAME before connecting.\n" + " Default: %s.\n" + " [--irc-server-addr | -isa] IRC_SERVER_ADDR\n" + " Connect to this server address.\n" + " Default: %s.\n" + " [--irc-server-port | -isp] IRC_SERVER_PORT\n" + " Connect to this server port.\n" + " Default: %s.\n" + " [--irc-server-channel | -isc] IRC_SERVER_CHANNEL\n" + " Connect to this server's channel.\n" + " Default: %s.\n" + " [--irc-username | -iu] USERNAME\n" + " Connect using this as 'username' (shown in WHOIS).\n" + " Default: %s.\n" + " [--irc-realname | -ir] REALNAME\n" + " Connect using this as 'realname' (shown in WHOIS).\n" + " Default: %s.\n" + " [--reply-rate | -rr] REPLY_RATE\n" + " Chance to reply to an event (integer, range [0, 100]).\n" + " Default: %d.\n", + exec, + ZoO_DEFAULT_DATA_FILENAME, + ZoO_DEFAULT_IRC_SERVER_ADDR, + ZoO_DEFAULT_IRC_SERVER_PORT, + ZoO_DEFAULT_IRC_SERVER_CHANNEL, + ZoO_DEFAULT_IRC_USERNAME, + ZoO_DEFAULT_IRC_REALNAME, + ZoO_DEFAULT_REPLY_RATE + ); +} + +static int parse_string_arg +( + const char * restrict dest [const restrict static 1], + int const i, + const char * restrict argv [const restrict static 1], + int const argc +) +{ + if (i == argc) + { + ZoO_FATAL + ( + "Missing value for parameter '%s'.", + /* Safe: i > 1 */ + argv[i - 1] + ); + + return -1; + } + + *dest = argv[i]; + + return 0; +} + +static int parse_integer_arg +( + int dest [const restrict static 1], + int const i, + const char * argv [const restrict static 1], + int const argc, + int const min_val, + int const max_val +) +{ + long int result; + char * endptr; + const int old_errno = errno; + + if (i == argc) + { + ZoO_FATAL + ( + "Missing value for parameter '%s'.", + /* Safe: i > 1 */ + argv[i - 1] + ); + + return -1; + } + + errno = 0; + + result = strtol(argv[i], &endptr, 10); + + if + ( + (errno != 0) + || ((*endptr) == '\n') + || (result < min_val) + || (result > max_val) + ) + { + ZoO_FATAL + ( + "Invalid or missing value for parameter '%s', accepted range is " + "[%d, %d] (integer).", + /* Safe: i > 1 */ + argv[i - 1], + min_val, + max_val + ); + + errno = old_errno; + + return -1; + } + + *dest = (int) result; + + errno = old_errno; + + return 0; +} + +int ZoO_parameters_initialize +( + struct ZoO_parameters param [const restrict static 1], + int const argc, + const char * argv [const restrict static argc] +) +{ + int i; + + load_default_parameters(param); + + for (i = 1; i < argc; ++i) + { + if + ( + (strcmp(argv[i], "--data-filename") == 0) + || (strcmp(argv[i], "-df") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->data_filename), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-addr") == 0) + || (strcmp(argv[i], "-isa") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_addr), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-port") == 0) + || (strcmp(argv[i], "-isp") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_port), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-channel") == 0) + || (strcmp(argv[i], "-isc") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_channel), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-username") == 0) + || (strcmp(argv[i], "-iu") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_username), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-realname") == 0) + || (strcmp(argv[i], "-in") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_realname), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--reply-rate") == 0) + || (strcmp(argv[i], "-rr") == 0) + ) + { + i += 1; + + if + ( + parse_integer_arg + ( + &(param->reply_rate), + i, + argv, + argc, + 0, + 100 + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--help") == 0) + || (strcmp(argv[i], "-h") == 0) + ) + { + print_help(argv[0]); + + return 0; + } + else + { + break; + } + } + + if (i == argc) + { + ZoO_S_FATAL("Missing argument: NICKNAME"); + + print_help(argv[0]); + + return -1; + } + + param->aliases_count = (argc - i); + param->aliases = (argv + i); + + return 1; +} diff --git a/src/io/parameters.h b/src/io/parameters.h new file mode 100644 index 0000000..1011e2b --- /dev/null +++ b/src/io/parameters.h @@ -0,0 +1,13 @@ +#ifndef _ZoO_IO_PARAMETERS_H_ +#define _ZoO_IO_PARAMETERS_H_ + +#include "parameters_types.h" + +int ZoO_parameters_initialize +( + struct ZoO_parameters param [const static 1], + int const argc, + const char * argv [const static argc] +); + +#endif diff --git a/src/io/parameters_types.h b/src/io/parameters_types.h new file mode 100644 index 0000000..6d511d8 --- /dev/null +++ b/src/io/parameters_types.h @@ -0,0 +1,20 @@ +#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ +#define _ZoO_IO_PARAMETERS_TYPES_H_ + +struct ZoO_parameters +{ + const char * restrict data_filename; + + const char * restrict irc_server_addr; + const char * restrict irc_server_port; + const char * restrict irc_server_channel; + const char * restrict irc_username; + const char * restrict irc_realname; + + int reply_rate; + + int aliases_count; + const char * restrict * restrict aliases; +}; + +#endif diff --git a/src/pervasive.h b/src/pervasive.h new file mode 100644 index 0000000..d2b0344 --- /dev/null +++ b/src/pervasive.h @@ -0,0 +1,59 @@ +#ifndef _ZoO_PERVASIVE_H_ +#define _ZoO_PERVASIVE_H_ + +#include <limits.h> + +#ifndef ZoO_NETWORK_TIMEOUT + #define ZoO_NETWORK_TIMEOUT 200 +#endif + +#ifndef ZoO_MAX_REPLY_WORDS + #define ZoO_MAX_REPLY_WORDS 64 +#endif + +#ifndef ZoO_DEFAULT_DATA_FILENAME + #define ZoO_DEFAULT_DATA_FILENAME "./memory.txt" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR + #define ZoO_DEFAULT_IRC_SERVER_ADDR "irc.foonetic.net" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_PORT + #define ZoO_DEFAULT_IRC_SERVER_PORT "6667" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL + #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" +#endif + +#ifndef ZoO_DEFAULT_IRC_USERNAME + #define ZoO_DEFAULT_IRC_USERNAME "zeroofone" +#endif + +#ifndef ZoO_DEFAULT_IRC_REALNAME + #define ZoO_DEFAULT_IRC_REALNAME "Zero of One (bot)" +#endif + +#ifndef ZoO_DEFAULT_REPLY_RATE + #define ZoO_DEFAULT_REPLY_RATE 8 +#endif + +typedef unsigned int ZoO_index; +#define ZoO_INDEX_MAX UINT_MAX + +/* ZoO_char = UTF-8 char */ +typedef char ZoO_char; +/* Functions that can handle UTF-8 'char' will use this symbol. */ +#define ZoO_CHAR_STRING_SYMBOL "%s" + +#define ZoO__TO_STRING(x) #x +#define ZoO_TO_STRING(x) ZoO__TO_STRING(x) +#define ZoO_ISOLATE(a) do {a} while (0) + +/* strncmp stops at '\0' and strlen does not count '\0'. */ +#define ZoO_IS_PREFIX(a, b) (strncmp(a, b, strlen(a)) == 0) + +#define ZoO_STRING_EQUALS(a, b) (strcmp(a, b) == 0) + +#endif diff --git a/src/tool/CMakeLists.txt b/src/tool/CMakeLists.txt new file mode 100644 index 0000000..3a1d947 --- /dev/null +++ b/src/tool/CMakeLists.txt @@ -0,0 +1,7 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/strings.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/tool/strings.c b/src/tool/strings.c new file mode 100644 index 0000000..fc4434a --- /dev/null +++ b/src/tool/strings.c @@ -0,0 +1,280 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../io/error.h" + +#include "strings.h" + + +void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]) +{ + s->words_count = 0; + s->words = (ZoO_char **) NULL; + s->word_sizes = (size_t *) NULL; +} + +void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]) +{ + if (s->words_count != 0) + { + ZoO_index i; + + for (i = 0; i < s->words_count; ++i) + { + free((void *) s->words[i]); + } + + s->words_count = 0; + + free((void *) s->words); + free((void *) s->word_sizes); + + s->words = (ZoO_char **) NULL; + s->word_sizes = (size_t *) NULL; + } +} + +static int add_word +( + struct ZoO_strings s [const restrict static 1], + size_t const line_size, + const ZoO_char line [const restrict static line_size] +) +{ + size_t * new_s_word_sizes; + ZoO_char * new_word, ** new_s_words; + + if (s->words_count == ZoO_INDEX_MAX) + { + ZoO_S_WARNING("Data input sentence has too many words."); + + return -1; + } + + /* overflow-safe, as line_size < SIZE_MAX */ + new_word = (ZoO_char *) calloc((line_size + 1), sizeof(ZoO_char)); + + if (new_word == (ZoO_char *) NULL) + { + ZoO_S_WARNING("Unable to allocate memory to extract new word."); + + return -1; + } + + memcpy((void *) new_word, (const void *) line, line_size); + + new_word[line_size] = '\0'; + + new_s_words = + (ZoO_char **) realloc + ( + (void *) s->words, + /* XXX: (sizeof() * _) assumed overflow-safe. */ + /* (di->words_count + 1) overflow-safe */ + (sizeof(ZoO_char *) * (s->words_count + 1)) + ); + + if (new_s_words == (ZoO_char **) NULL) + { + ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + + free((void *) new_word); + + return -1; + } + + s->words = new_s_words; + + new_s_word_sizes = + (size_t *) realloc + ( + (void *) s->word_sizes, + /* XXX: (sizeof() * _) assumed overflow-safe. */ + /* (di->words_count + 1) overflow-safe */ + (sizeof(size_t) * (s->words_count + 1)) + ); + + if (new_s_word_sizes == (size_t *) NULL) + { + ZoO_S_WARNING("Unable to reallocate memory to extract new word."); + + free((void *) new_word); + + return -1; + } + + s->word_sizes = new_s_word_sizes; + + s->words[s->words_count] = new_word; + s->word_sizes[s->words_count] = (line_size + 1); + + s->words_count += 1; + + return 0; +} + +static int parse_word +( + struct ZoO_strings s [const restrict static 1], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count], + size_t const line_size, + ZoO_char line [const static line_size] +) +{ + ZoO_index j; + + if (line_size == 0) + { + return 0; + } + + for (j = 0; j < line_size; ++j) + { + switch (line[j]) + { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + line[j] = 'z' - ('Z' - line[j]); + break; + + default: + break; + } + } + + for (j = 0; j < punctuations_count; ++j) + { + /* overflow-safe: line_size > 1 */ + if (line[line_size - 1] == punctuations[j]) + { + if (line_size > 1) + { + if + ( + /* overflow-safe: line_size > 1 */ + (add_word(s, (line_size - 1), line) < 0) + /* overflow-safe: line_size > 1 */ + /* prevents line[restrict] */ + || (add_word(s, 1, (line + (line_size - 1))) < 0) + ) + { + return -1; + } + + return 0; + } + } + } + + return add_word(s, line_size, line); +} + +int ZoO_strings_parse +( + struct ZoO_strings s [const restrict static 1], + size_t const input_size, + ZoO_char input [const restrict], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +) +{ + size_t i, w_start; + + ZoO_strings_finalize(s); + + if (input == NULL) + { + return 0; + } + + i = 0; + + /* overflow-safe: input is '\0' terminated. */ + while (input[i] == ' ') + { + ++i; + } + + w_start = i; + + for (; i < input_size; ++i) + { + if (input[i] == ' ') + { + if + ( + parse_word + ( + s, + punctuations_count, + punctuations, + /* overflow-safe: w_start < i */ + (i - w_start), + (input + w_start) + ) < 0 + ) + { + ZoO_strings_finalize(s); + + return -1; + } + + ++i; + + /* safe, as input is terminated by '\0' */ + while (input[i] == ' ') + { + ++i; + } + + w_start = i; + } + } + + if + ( + parse_word + ( + s, + punctuations_count, + punctuations, + /* overflow-safe: w_start < i */ + (i - w_start), + (input + w_start) + ) < 0 + ) + { + ZoO_strings_finalize(s); + + return -1; + } + + return 0; +} diff --git a/src/tool/strings.h b/src/tool/strings.h new file mode 100644 index 0000000..6e6e211 --- /dev/null +++ b/src/tool/strings.h @@ -0,0 +1,19 @@ +#ifndef _ZoO_TOOL_STRINGS_H_ +#define _ZoO_TOOL_STRINGS_H_ + +#include "strings_types.h" + +void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]); + +void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]); + +int ZoO_strings_parse +( + struct ZoO_strings s [const static 1], + size_t const input_size, + ZoO_char input [const restrict], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +); + +#endif diff --git a/src/tool/strings_types.h b/src/tool/strings_types.h new file mode 100644 index 0000000..f74dcc8 --- /dev/null +++ b/src/tool/strings_types.h @@ -0,0 +1,15 @@ +#ifndef _ZoO_TOOL_STRINGS_TYPES_H_ +#define _ZoO_TOOL_STRINGS_TYPES_H_ + +#include <stdio.h> + +#include "../pervasive.h" + +struct ZoO_strings +{ + ZoO_index words_count; + ZoO_char * restrict * restrict words; + size_t * restrict word_sizes; +}; + +#endif |


