summaryrefslogtreecommitdiff
path: root/src/tool
diff options
context:
space:
mode:
authorNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-05-05 14:59:28 +0200
committerNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2016-05-05 14:59:28 +0200
commit3405b0c1635843cbb81f042364bfcf238d7dc930 (patch)
tree39501fec9ec72863c929a45dbc297412bbf90688 /src/tool
parentc28bb6d31a122ec983e1e0a0dd1a8bd198098c58 (diff)
Adds the current code.
It's been running for close to a month on one of the IRC channels I frequent and seems to be working fine. One should be aware that, among other missing features, this version does not store permanently what the bot learns. Indeed, I am currently using a file with 431848 lines as its initial knowledge bank, making this particular feature not a high priority one. Also consider the fact that Zero of One converts text to underscore before reading it but will not change its own aliases. This could potentially be a cause for surprises when using uppercase letters in the latter.
Diffstat (limited to 'src/tool')
-rw-r--r--src/tool/CMakeLists.txt7
-rw-r--r--src/tool/strings.c280
-rw-r--r--src/tool/strings.h19
-rw-r--r--src/tool/strings_types.h15
4 files changed, 321 insertions, 0 deletions
diff --git a/src/tool/CMakeLists.txt b/src/tool/CMakeLists.txt
new file mode 100644
index 0000000..3a1d947
--- /dev/null
+++ b/src/tool/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/strings.c
+)
+
+set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
+
diff --git a/src/tool/strings.c b/src/tool/strings.c
new file mode 100644
index 0000000..fc4434a
--- /dev/null
+++ b/src/tool/strings.c
@@ -0,0 +1,280 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../io/error.h"
+
+#include "strings.h"
+
+
+void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1])
+{
+ s->words_count = 0;
+ s->words = (ZoO_char **) NULL;
+ s->word_sizes = (size_t *) NULL;
+}
+
+void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1])
+{
+ if (s->words_count != 0)
+ {
+ ZoO_index i;
+
+ for (i = 0; i < s->words_count; ++i)
+ {
+ free((void *) s->words[i]);
+ }
+
+ s->words_count = 0;
+
+ free((void *) s->words);
+ free((void *) s->word_sizes);
+
+ s->words = (ZoO_char **) NULL;
+ s->word_sizes = (size_t *) NULL;
+ }
+}
+
+static int add_word
+(
+ struct ZoO_strings s [const restrict static 1],
+ size_t const line_size,
+ const ZoO_char line [const restrict static line_size]
+)
+{
+ size_t * new_s_word_sizes;
+ ZoO_char * new_word, ** new_s_words;
+
+ if (s->words_count == ZoO_INDEX_MAX)
+ {
+ ZoO_S_WARNING("Data input sentence has too many words.");
+
+ return -1;
+ }
+
+ /* overflow-safe, as line_size < SIZE_MAX */
+ new_word = (ZoO_char *) calloc((line_size + 1), sizeof(ZoO_char));
+
+ if (new_word == (ZoO_char *) NULL)
+ {
+ ZoO_S_WARNING("Unable to allocate memory to extract new word.");
+
+ return -1;
+ }
+
+ memcpy((void *) new_word, (const void *) line, line_size);
+
+ new_word[line_size] = '\0';
+
+ new_s_words =
+ (ZoO_char **) realloc
+ (
+ (void *) s->words,
+ /* XXX: (sizeof() * _) assumed overflow-safe. */
+ /* (di->words_count + 1) overflow-safe */
+ (sizeof(ZoO_char *) * (s->words_count + 1))
+ );
+
+ if (new_s_words == (ZoO_char **) NULL)
+ {
+ ZoO_S_WARNING("Unable to reallocate memory to extract new word.");
+
+ free((void *) new_word);
+
+ return -1;
+ }
+
+ s->words = new_s_words;
+
+ new_s_word_sizes =
+ (size_t *) realloc
+ (
+ (void *) s->word_sizes,
+ /* XXX: (sizeof() * _) assumed overflow-safe. */
+ /* (di->words_count + 1) overflow-safe */
+ (sizeof(size_t) * (s->words_count + 1))
+ );
+
+ if (new_s_word_sizes == (size_t *) NULL)
+ {
+ ZoO_S_WARNING("Unable to reallocate memory to extract new word.");
+
+ free((void *) new_word);
+
+ return -1;
+ }
+
+ s->word_sizes = new_s_word_sizes;
+
+ s->words[s->words_count] = new_word;
+ s->word_sizes[s->words_count] = (line_size + 1);
+
+ s->words_count += 1;
+
+ return 0;
+}
+
+static int parse_word
+(
+ struct ZoO_strings s [const restrict static 1],
+ ZoO_index const punctuations_count,
+ const ZoO_char punctuations [const restrict static punctuations_count],
+ size_t const line_size,
+ ZoO_char line [const static line_size]
+)
+{
+ ZoO_index j;
+
+ if (line_size == 0)
+ {
+ return 0;
+ }
+
+ for (j = 0; j < line_size; ++j)
+ {
+ switch (line[j])
+ {
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+ line[j] = 'z' - ('Z' - line[j]);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ for (j = 0; j < punctuations_count; ++j)
+ {
+ /* overflow-safe: line_size > 1 */
+ if (line[line_size - 1] == punctuations[j])
+ {
+ if (line_size > 1)
+ {
+ if
+ (
+ /* overflow-safe: line_size > 1 */
+ (add_word(s, (line_size - 1), line) < 0)
+ /* overflow-safe: line_size > 1 */
+ /* prevents line[restrict] */
+ || (add_word(s, 1, (line + (line_size - 1))) < 0)
+ )
+ {
+ return -1;
+ }
+
+ return 0;
+ }
+ }
+ }
+
+ return add_word(s, line_size, line);
+}
+
+int ZoO_strings_parse
+(
+ struct ZoO_strings s [const restrict static 1],
+ size_t const input_size,
+ ZoO_char input [const restrict],
+ ZoO_index const punctuations_count,
+ const ZoO_char punctuations [const restrict static punctuations_count]
+)
+{
+ size_t i, w_start;
+
+ ZoO_strings_finalize(s);
+
+ if (input == NULL)
+ {
+ return 0;
+ }
+
+ i = 0;
+
+ /* overflow-safe: input is '\0' terminated. */
+ while (input[i] == ' ')
+ {
+ ++i;
+ }
+
+ w_start = i;
+
+ for (; i < input_size; ++i)
+ {
+ if (input[i] == ' ')
+ {
+ if
+ (
+ parse_word
+ (
+ s,
+ punctuations_count,
+ punctuations,
+ /* overflow-safe: w_start < i */
+ (i - w_start),
+ (input + w_start)
+ ) < 0
+ )
+ {
+ ZoO_strings_finalize(s);
+
+ return -1;
+ }
+
+ ++i;
+
+ /* safe, as input is terminated by '\0' */
+ while (input[i] == ' ')
+ {
+ ++i;
+ }
+
+ w_start = i;
+ }
+ }
+
+ if
+ (
+ parse_word
+ (
+ s,
+ punctuations_count,
+ punctuations,
+ /* overflow-safe: w_start < i */
+ (i - w_start),
+ (input + w_start)
+ ) < 0
+ )
+ {
+ ZoO_strings_finalize(s);
+
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/tool/strings.h b/src/tool/strings.h
new file mode 100644
index 0000000..6e6e211
--- /dev/null
+++ b/src/tool/strings.h
@@ -0,0 +1,19 @@
+#ifndef _ZoO_TOOL_STRINGS_H_
+#define _ZoO_TOOL_STRINGS_H_
+
+#include "strings_types.h"
+
+void ZoO_strings_initialize (struct ZoO_strings s [const restrict static 1]);
+
+void ZoO_strings_finalize (struct ZoO_strings s [const restrict static 1]);
+
+int ZoO_strings_parse
+(
+ struct ZoO_strings s [const static 1],
+ size_t const input_size,
+ ZoO_char input [const restrict],
+ ZoO_index const punctuations_count,
+ const ZoO_char punctuations [const restrict static punctuations_count]
+);
+
+#endif
diff --git a/src/tool/strings_types.h b/src/tool/strings_types.h
new file mode 100644
index 0000000..f74dcc8
--- /dev/null
+++ b/src/tool/strings_types.h
@@ -0,0 +1,15 @@
+#ifndef _ZoO_TOOL_STRINGS_TYPES_H_
+#define _ZoO_TOOL_STRINGS_TYPES_H_
+
+#include <stdio.h>
+
+#include "../pervasive.h"
+
+struct ZoO_strings
+{
+ ZoO_index words_count;
+ ZoO_char * restrict * restrict words;
+ size_t * restrict word_sizes;
+};
+
+#endif