| summaryrefslogtreecommitdiff |
diff options
Diffstat (limited to 'src/core/sequence_from_string.c')
| -rw-r--r-- | src/core/sequence_from_string.c | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/src/core/sequence_from_string.c b/src/core/sequence_from_string.c new file mode 100644 index 0000000..51d7049 --- /dev/null +++ b/src/core/sequence_from_string.c @@ -0,0 +1,315 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../cli/cli.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +static int add_word_id_to_sequence +( + const ZoO_index word_id, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1] +) +{ + ZoO_index * new_sequence; + + *sequence_length += 1; + + new_sequence = + (ZoO_index *) realloc + ( + (void *) *sequence, + (((size_t) sequence_length) * sizeof(ZoO_index)) + ); + + if (new_sequence == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Unable to reallocate a sequence to add word ids to it."); + + return -1; + } + + return 0; +} + +/******************************************************************************/ +/** HANDLING PUNCTUATION ******************************************************/ +/******************************************************************************/ +static int add_punctuation_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_char punctuation, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + const struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index word_id; + ZoO_char as_word[2]; + + as_word[0] = punctuation; + as_word[1] = '\0'; + + if (ZoO_knowledge_find_word_id(k, as_word, 2, &word_id) < 0) + { + ZoO_PROG_ERROR + ( + "'%s' was defined as a punctuation, was found in a string, yet is not" + " defined in the knowledge database.", + as_word + ); + + return -1; + } + + if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) + { + return -1; + } + + return 0; +} + +static int word_is_punctuation_terminated +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length +) +{ + return ZoO_char_is_punctuation(string[word_length]); +} + +/******************************************************************************/ +/** HANDLING WORDS ************************************************************/ +/******************************************************************************/ +static int add_word_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index word_id; + ZoO_char * stored_word; + + if (word_length == 0) + { + return 0; + } + + if + ( + ZoO_knowledge_learn_word + ( + k, + (string + word_start), + word_length, + &word_id + ) < 0 + ) + { + return -1; + } + + if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) + { + return -1; + } + + return 0; +} + +static int add_finding_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index punctuation; + + if (word_is_punctuation_terminated(string, word_start, word_length)) + { + punctuation = 1; + } + else + { + punctuation = 0; + } + + if + ( + add_word_to_sequence + ( + string, + word_start, + (word_length - punctuation), + sequence, + sequence_length, + k + ) < 0 + ) + { + return -1; + } + + if + ( + (punctuation == 1) + && + ( + add_punctuation_to_sequence + ( + string, + string[word_start + word_length - 1], + sequence, + sequence_length, + k + ) < 0 + ) + ) + { + return -1; + } + + return 0; +} + +static int find_word +( + const ZoO_char string [const restrict static 1], + const ZoO_index string_length, + const ZoO_index offset, + ZoO_index word_start [const restrict static 1], + ZoO_index word_length [const restrict static 1] +) +{ + ZoO_index i; + + i = offset; + + while ((string[i] == ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_start = i; + + while ((string[i] != ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_length = (i - *word_start); + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ +int ZoO_sequence_from_undercase_string +( + const ZoO_char string [const restrict], + const ZoO_index string_length, + struct ZoO_knowledge k [const restrict static 1], + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1] +) +{ + ZoO_index word_start, word_length; + ZoO_index i; + + i = 0; + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + if + ( + add_word_id_to_sequence + ( + ZoO_START_OF_SEQUENCE_ID, + sequence, + sequence_length + ) < 0 + ) + { + return -1; + } + + while (i < string_length) + { + if (find_word(string, i, string_length, &word_start, &word_length) < 0) + { + break; + } + + if + ( + add_finding_to_sequence + ( + string, + word_start, + word_length, + sequence, + sequence_length, + k + ) < 0 + ) + { + free((void *) *sequence); + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + i = (word_start + word_length); + } + + if + ( + add_word_id_to_sequence + ( + ZoO_END_OF_SEQUENCE_ID, + sequence, + sequence_length + ) < 0 + ) + { + free((void *) *sequence); + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + return 0; +} |


