summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2017-01-18 19:09:16 +0100
committerNathanael Sensfelder <SpamShield0@MultiAgentSystems.org>2017-01-18 19:09:16 +0100
commit0d49fb74eadcf933f696420cd182077927680d26 (patch)
tree9220d260ce878f369138da12dae0300cf9ade5c9 /src/core/sequence_from_string.c
parent24afb3e60bafd98e6a83dcb41ee6a7f7d41e76bc (diff)
Done with 'core', starting to work on 'knowledge'.
Diffstat (limited to 'src/core/sequence_from_string.c')
-rw-r--r--src/core/sequence_from_string.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/src/core/sequence_from_string.c b/src/core/sequence_from_string.c
new file mode 100644
index 0000000..51d7049
--- /dev/null
+++ b/src/core/sequence_from_string.c
@@ -0,0 +1,315 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../core/char.h"
+#include "../core/index.h"
+
+#include "../cli/cli.h"
+
+#include "../knowledge/knowledge.h"
+
+#include "sequence.h"
+
+static int add_word_id_to_sequence
+(
+ const ZoO_index word_id,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1]
+)
+{
+ ZoO_index * new_sequence;
+
+ *sequence_length += 1;
+
+ new_sequence =
+ (ZoO_index *) realloc
+ (
+ (void *) *sequence,
+ (((size_t) sequence_length) * sizeof(ZoO_index))
+ );
+
+ if (new_sequence == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR("Unable to reallocate a sequence to add word ids to it.");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/******************************************************************************/
+/** HANDLING PUNCTUATION ******************************************************/
+/******************************************************************************/
+static int add_punctuation_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_char punctuation,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ const struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index word_id;
+ ZoO_char as_word[2];
+
+ as_word[0] = punctuation;
+ as_word[1] = '\0';
+
+ if (ZoO_knowledge_find_word_id(k, as_word, 2, &word_id) < 0)
+ {
+ ZoO_PROG_ERROR
+ (
+ "'%s' was defined as a punctuation, was found in a string, yet is not"
+ " defined in the knowledge database.",
+ as_word
+ );
+
+ return -1;
+ }
+
+ if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int word_is_punctuation_terminated
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length
+)
+{
+ return ZoO_char_is_punctuation(string[word_length]);
+}
+
+/******************************************************************************/
+/** HANDLING WORDS ************************************************************/
+/******************************************************************************/
+static int add_word_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index word_id;
+ ZoO_char * stored_word;
+
+ if (word_length == 0)
+ {
+ return 0;
+ }
+
+ if
+ (
+ ZoO_knowledge_learn_word
+ (
+ k,
+ (string + word_start),
+ word_length,
+ &word_id
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int add_finding_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index punctuation;
+
+ if (word_is_punctuation_terminated(string, word_start, word_length))
+ {
+ punctuation = 1;
+ }
+ else
+ {
+ punctuation = 0;
+ }
+
+ if
+ (
+ add_word_to_sequence
+ (
+ string,
+ word_start,
+ (word_length - punctuation),
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ if
+ (
+ (punctuation == 1)
+ &&
+ (
+ add_punctuation_to_sequence
+ (
+ string,
+ string[word_start + word_length - 1],
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ )
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int find_word
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index string_length,
+ const ZoO_index offset,
+ ZoO_index word_start [const restrict static 1],
+ ZoO_index word_length [const restrict static 1]
+)
+{
+ ZoO_index i;
+
+ i = offset;
+
+ while ((string[i] == ' ') && (i < string_length))
+ {
+ i += 1;
+ }
+
+ if (i >= string_length)
+ {
+ return -1;
+ }
+
+ *word_start = i;
+
+ while ((string[i] != ' ') && (i < string_length))
+ {
+ i += 1;
+ }
+
+ if (i >= string_length)
+ {
+ return -1;
+ }
+
+ *word_length = (i - *word_start);
+
+ return 0;
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+int ZoO_sequence_from_undercase_string
+(
+ const ZoO_char string [const restrict],
+ const ZoO_index string_length,
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1]
+)
+{
+ ZoO_index word_start, word_length;
+ ZoO_index i;
+
+ i = 0;
+
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ if
+ (
+ add_word_id_to_sequence
+ (
+ ZoO_START_OF_SEQUENCE_ID,
+ sequence,
+ sequence_length
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ while (i < string_length)
+ {
+ if (find_word(string, i, string_length, &word_start, &word_length) < 0)
+ {
+ break;
+ }
+
+ if
+ (
+ add_finding_to_sequence
+ (
+ string,
+ word_start,
+ word_length,
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ {
+ free((void *) *sequence);
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ return -1;
+ }
+
+ i = (word_start + word_length);
+ }
+
+ if
+ (
+ add_word_id_to_sequence
+ (
+ ZoO_END_OF_SEQUENCE_ID,
+ sequence,
+ sequence_length
+ ) < 0
+ )
+ {
+ free((void *) *sequence);
+
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ return -1;
+ }
+
+ return 0;
+}