From 0d49fb74eadcf933f696420cd182077927680d26 Mon Sep 17 00:00:00 2001 From: Nathanael Sensfelder Date: Wed, 18 Jan 2017 19:09:16 +0100 Subject: Done with 'core', starting to work on 'knowledge'. --- src/CMakeLists.txt | 10 +- src/cli/CMakeLists.txt | 6 + src/cli/cli.h | 145 ++++++++ src/cli/parameters.c | 385 +++++++++++++++++++++ src/cli/parameters.h | 13 + src/cli/parameters_types.h | 67 ++++ src/core/CMakeLists.txt | 9 +- src/core/assimilate.c | 281 --------------- src/core/char.c | 18 +- src/core/char.h | 35 +- src/core/char_types.h | 6 +- src/core/index.c | 61 ++++ src/core/index.h | 11 + src/core/index_types.h | 2 + src/core/knowledge.c | 15 - src/core/knowledge.h | 101 ------ src/core/knowledge_finalize.c | 121 ------- src/core/knowledge_search.c | 335 ------------------ src/core/knowledge_types.h | 37 -- src/core/main.c | 436 ------------------------ src/core/sequence.c | 84 +++-- src/core/sequence.h | 20 +- src/core/sequence_creation.c | 9 +- src/core/sequence_from_string.c | 315 +++++++++++++++++ src/core/sequence_types.h | 3 +- src/file/data_input.c | 98 ++++++ src/file/data_input.h | 21 ++ src/file/data_input_types.h | 16 + src/file/data_output.c | 66 ++++ src/file/data_output.h | 11 + src/io/CMakeLists.txt | 9 - src/io/data_input.c | 98 ------ src/io/data_input.h | 21 -- src/io/data_input_types.h | 16 - src/io/data_output.c | 65 ---- src/io/data_output.h | 11 - src/io/error.h | 148 -------- src/io/network.c | 568 ------------------------------- src/io/network.h | 28 -- src/io/network_types.h | 34 -- src/io/parameters.c | 385 --------------------- src/io/parameters.h | 13 - src/io/parameters_types.h | 21 -- src/irc/network.c | 568 +++++++++++++++++++++++++++++++ src/irc/network.h | 28 ++ src/irc/network_types.h | 34 ++ src/knowledge/CMakeLists.txt | 11 + src/knowledge/knowledge.c | 21 ++ src/knowledge/knowledge.h | 110 ++++++ src/knowledge/knowledge_finalize.c | 122 +++++++ src/knowledge/knowledge_learn_sequence.c | 324 ++++++++++++++++++ src/knowledge/knowledge_learn_word.c | 276 +++++++++++++++ src/knowledge/knowledge_search.c | 336 ++++++++++++++++++ src/knowledge/knowledge_types.h | 38 +++ src/main.c | 436 ++++++++++++++++++++++++ src/pervasive.h | 34 +- 56 files changed, 3673 insertions(+), 2819 deletions(-) create mode 100644 src/cli/CMakeLists.txt create mode 100644 src/cli/cli.h create mode 100644 src/cli/parameters.c create mode 100644 src/cli/parameters.h create mode 100644 src/cli/parameters_types.h delete mode 100644 src/core/assimilate.c create mode 100644 src/core/index.c delete mode 100644 src/core/knowledge.c delete mode 100644 src/core/knowledge.h delete mode 100644 src/core/knowledge_finalize.c delete mode 100644 src/core/knowledge_search.c delete mode 100644 src/core/knowledge_types.h delete mode 100644 src/core/main.c create mode 100644 src/core/sequence_from_string.c create mode 100644 src/file/data_input.c create mode 100644 src/file/data_input.h create mode 100644 src/file/data_input_types.h create mode 100644 src/file/data_output.c create mode 100644 src/file/data_output.h delete mode 100644 src/io/CMakeLists.txt delete mode 100644 src/io/data_input.c delete mode 100644 src/io/data_input.h delete mode 100644 src/io/data_input_types.h delete mode 100644 src/io/data_output.c delete mode 100644 src/io/data_output.h delete mode 100644 src/io/error.h delete mode 100644 src/io/network.c delete mode 100644 src/io/network.h delete mode 100644 src/io/network_types.h delete mode 100644 src/io/parameters.c delete mode 100644 src/io/parameters.h delete mode 100644 src/io/parameters_types.h create mode 100644 src/irc/network.c create mode 100644 src/irc/network.h create mode 100644 src/irc/network_types.h create mode 100644 src/knowledge/CMakeLists.txt create mode 100644 src/knowledge/knowledge.c create mode 100644 src/knowledge/knowledge.h create mode 100644 src/knowledge/knowledge_finalize.c create mode 100644 src/knowledge/knowledge_learn_sequence.c create mode 100644 src/knowledge/knowledge_learn_word.c create mode 100644 src/knowledge/knowledge_search.c create mode 100644 src/knowledge/knowledge_types.h create mode 100644 src/main.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 76a73ed..594b14f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,13 @@ +add_subdirectory(cli) add_subdirectory(core) -add_subdirectory(io) +add_subdirectory(file) +add_subdirectory(irc) +add_subdirectory(knowledge) add_subdirectory(tool) +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/main.c +) + set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt new file mode 100644 index 0000000..94e6337 --- /dev/null +++ b/src/cli/CMakeLists.txt @@ -0,0 +1,6 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c +) +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/cli/cli.h b/src/cli/cli.h new file mode 100644 index 0000000..5aec25b --- /dev/null +++ b/src/cli/cli.h @@ -0,0 +1,145 @@ +#ifndef _ZoO_CLI_CLI_H_ +#define _ZoO_CLI_CLI_H_ + +#include + +#include "../pervasive.h" + + +#ifndef ZoO_DEBUG_PROGRAM_FLOW + #define ZoO_DEBUG_PROGRAM_FLOW (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_CONFIG + #define ZoO_DEBUG_CONFIG (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_LEARNING + #define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL) +#endif + +#ifndef ZoO_DEBUG_NETWORK + #define ZoO_DEBUG_NETWORK 1 +#endif + +#ifndef ZoO_DEBUG_NETWORK + #define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL) +#endif + +#define ZoO_ENABLE_WARNINGS_OUTPUT 1 +#define ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT 1 +#define ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT 1 +#define ZoO_ENABLE_FATAL_ERROR_OUTPUT 1 + +#ifdef ZoO_ENABLE_ERROR_LOCATION + #define ZoO_LOCATION "[" __FILE__ "][" ZoO_TO_STRING(__LINE__) "]" +#else + #define ZoO_LOCATION "" +#endif + +#define ZoO_PRINT_STDERR(symbol, str, ...)\ + fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n", __VA_ARGS__); + +/* + * Given that we use preprocessor contants as flags, we can expect the compilers + * to remove the test condition for disabled flags. No need to be shy about + * allowing many debug options. + */ + +#define ZoO_DEBUG(flag, str, ...)\ + ZoO_ISOLATE\ + (\ + if (flag)\ + {\ + ZoO_PRINT_STDERR("D", str, __VA_ARGS__);\ + }\ + ) + + +#define ZoO_WARNING(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_WARNINGS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("W", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_ERROR(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("E", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_PROG_ERROR(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("P", str, __VA_ARGS__);\ + }\ + ) + +#define ZoO_FATAL(str, ...)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ + {\ + ZoO_PRINT_STDERR("F", str, __VA_ARGS__);\ + }\ + ) + +/* For outputs without dynamic content (static). ******************************/ + +#define ZoO_PRINT_S_STDERR(symbol, str)\ + fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n"); + +#define ZoO_S_DEBUG(flag, str)\ + ZoO_ISOLATE\ + (\ + if (flag)\ + {\ + ZoO_PRINT_S_STDERR("D", str);\ + }\ + ) + +#define ZoO_S_WARNING(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_WARNINGS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("W", str);\ + }\ + ) + +#define ZoO_S_ERROR(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("E", str);\ + }\ + ) + +#define ZoO_S_PROG_ERROR(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("P", str);\ + }\ + ) + +#define ZoO_S_FATAL(str)\ + ZoO_ISOLATE\ + (\ + if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ + {\ + ZoO_PRINT_S_STDERR("F", str);\ + }\ + ) + +#endif diff --git a/src/cli/parameters.c b/src/cli/parameters.c new file mode 100644 index 0000000..77c33aa --- /dev/null +++ b/src/cli/parameters.c @@ -0,0 +1,385 @@ +#include +#include +#include +#include + +#include "../pervasive.h" + +#include "error.h" + +#include "parameters.h" + +static void load_default_parameters +( + struct ZoO_parameters param [const restrict static 1] +) +{ + param->data_filename = ZoO_DEFAULT_DATA_FILENAME; + param->new_data_filename = (char *) NULL; + + param->irc_server_addr = ZoO_DEFAULT_IRC_SERVER_ADDR; + param->irc_server_port = ZoO_DEFAULT_IRC_SERVER_PORT; + param->irc_server_channel = ZoO_DEFAULT_IRC_SERVER_CHANNEL; + param->irc_username = ZoO_DEFAULT_IRC_USERNAME; + param->irc_realname = ZoO_DEFAULT_IRC_REALNAME; + + param->reply_rate = ZoO_DEFAULT_REPLY_RATE; + + param->aliases_count = 0; + param->aliases = NULL; +} + +static void print_help (const char exec [const restrict static 1]) +{ + printf + ( + "Usage: %s [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...] \n" + "NICKNAME is used as the IRC nickname value.\n" + "If NICKNAME or any ALIAS is found in an event, the program will reply.\n" + "\nAvailable options:\n" + " [--data-filename | -df] FILENAME\n" + " Learn content from FILENAME before connecting.\n" + " Default: %s.\n" + " [--new-data-filename | -ndf] FILENAME\n" + " Store new data learned in FILENAME.\n" + " Default: value of the --data-filename param.\n" + " [--irc-server-addr | -isa] IRC_SERVER_ADDR\n" + " Connect to this server address.\n" + " Default: %s.\n" + " [--irc-server-port | -isp] IRC_SERVER_PORT\n" + " Connect to this server port.\n" + " Default: %s.\n" + " [--irc-server-channel | -isc] IRC_SERVER_CHANNEL\n" + " Connect to this server's channel.\n" + " Default: %s.\n" + " [--irc-username | -iu] USERNAME\n" + " Connect using this as 'username' (shown in WHOIS).\n" + " Default: %s.\n" + " [--irc-realname | -ir] REALNAME\n" + " Connect using this as 'realname' (shown in WHOIS).\n" + " Default: %s.\n" + " [--reply-rate | -rr] REPLY_RATE\n" + " Chance to reply to an event (integer, range [0, 100]).\n" + " Default: %d.\n", + exec, + ZoO_DEFAULT_DATA_FILENAME, + ZoO_DEFAULT_IRC_SERVER_ADDR, + ZoO_DEFAULT_IRC_SERVER_PORT, + ZoO_DEFAULT_IRC_SERVER_CHANNEL, + ZoO_DEFAULT_IRC_USERNAME, + ZoO_DEFAULT_IRC_REALNAME, + ZoO_DEFAULT_REPLY_RATE + ); +} + +static int parse_string_arg +( + const char * restrict dest [const restrict static 1], + int const i, + const char * restrict argv [const restrict static 1], + int const argc +) +{ + if (i == argc) + { + ZoO_FATAL + ( + "Missing value for parameter '%s'.", + /* Safe: i > 1 */ + argv[i - 1] + ); + + return -1; + } + + *dest = argv[i]; + + return 0; +} + +static int parse_integer_arg +( + int dest [const restrict static 1], + int const i, + const char * argv [const restrict static 1], + int const argc, + int const min_val, + int const max_val +) +{ + long int result; + char * endptr; + const int old_errno = errno; + + if (i == argc) + { + ZoO_FATAL + ( + "Missing value for parameter '%s'.", + /* Safe: i > 1 */ + argv[i - 1] + ); + + return -1; + } + + errno = 0; + + result = strtol(argv[i], &endptr, 10); + + if + ( + (errno != 0) + || ((*endptr) == '\n') + || (result < min_val) + || (result > max_val) + ) + { + ZoO_FATAL + ( + "Invalid or missing value for parameter '%s', accepted range is " + "[%d, %d] (integer).", + /* Safe: i > 1 */ + argv[i - 1], + min_val, + max_val + ); + + errno = old_errno; + + return -1; + } + + *dest = (int) result; + + errno = old_errno; + + return 0; +} + +int ZoO_parameters_initialize +( + struct ZoO_parameters param [const restrict static 1], + int const argc, + const char * argv [const restrict static argc] +) +{ + int i; + + load_default_parameters(param); + + for (i = 1; i < argc; ++i) + { + if + ( + (strcmp(argv[i], "--data-filename") == 0) + || (strcmp(argv[i], "-df") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->data_filename), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--new-data-filename") == 0) + || (strcmp(argv[i], "-ndf") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->new_data_filename), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-addr") == 0) + || (strcmp(argv[i], "-isa") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_addr), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-port") == 0) + || (strcmp(argv[i], "-isp") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_port), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-server-channel") == 0) + || (strcmp(argv[i], "-isc") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_server_channel), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-username") == 0) + || (strcmp(argv[i], "-iu") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_username), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--irc-realname") == 0) + || (strcmp(argv[i], "-in") == 0) + ) + { + i += 1; + + if + ( + parse_string_arg + ( + &(param->irc_realname), + i, + argv, + argc + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--reply-rate") == 0) + || (strcmp(argv[i], "-rr") == 0) + ) + { + i += 1; + + if + ( + parse_integer_arg + ( + &(param->reply_rate), + i, + argv, + argc, + 0, + 100 + ) < 0 + ) + { + return -1; + } + } + else if + ( + (strcmp(argv[i], "--help") == 0) + || (strcmp(argv[i], "-h") == 0) + ) + { + print_help(argv[0]); + + return 0; + } + else + { + break; + } + } + + if (i == argc) + { + ZoO_S_FATAL("Missing argument: NICKNAME"); + + print_help(argv[0]); + + return -1; + } + + param->aliases_count = (argc - i); + param->aliases = (argv + i); + + if (param->new_data_filename == (char *) NULL) + { + param->new_data_filename = param->data_filename; + } + + return 1; +} diff --git a/src/cli/parameters.h b/src/cli/parameters.h new file mode 100644 index 0000000..1011e2b --- /dev/null +++ b/src/cli/parameters.h @@ -0,0 +1,13 @@ +#ifndef _ZoO_IO_PARAMETERS_H_ +#define _ZoO_IO_PARAMETERS_H_ + +#include "parameters_types.h" + +int ZoO_parameters_initialize +( + struct ZoO_parameters param [const static 1], + int const argc, + const char * argv [const static argc] +); + +#endif diff --git a/src/cli/parameters_types.h b/src/cli/parameters_types.h new file mode 100644 index 0000000..15b5254 --- /dev/null +++ b/src/cli/parameters_types.h @@ -0,0 +1,67 @@ +#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ +#define _ZoO_IO_PARAMETERS_TYPES_H_ + +#include "../pervasive.h" + +/******************************************************************************/ +/** DEFAULT VALUES ************************************************************/ +/******************************************************************************/ + +#ifndef ZoO_DEFAULT_DATA_FILENAME + #define ZoO_DEFAULT_DATA_FILENAME "./memory.txt" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR + #define ZoO_DEFAULT_IRC_SERVER_ADDR "irc.foonetic.net" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_PORT + #define ZoO_DEFAULT_IRC_SERVER_PORT "6667" +#endif + +#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL + #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" +#endif + +#ifndef ZoO_DEFAULT_IRC_USERNAME + #define ZoO_DEFAULT_IRC_USERNAME "zeroofone" +#endif + +#ifndef ZoO_DEFAULT_IRC_REALNAME + #define ZoO_DEFAULT_IRC_REALNAME "Zero of One (bot)" +#endif + +#ifndef ZoO_DEFAULT_REPLY_RATE + #define ZoO_DEFAULT_REPLY_RATE 8 +#endif + +/******************************************************************************/ +/** DEBUG LEVELS **************************************************************/ +/******************************************************************************/ + +#ifndef ZoO_DEBUG_PARAMETERS + #define ZoO_DEBUG_PARAMETERS (0 || ZoO_DEBUG_ALL) +#endif + +/******************************************************************************/ +/** FUNCTIONS *****************************************************************/ +/******************************************************************************/ + +struct ZoO_parameters +{ + const char * restrict data_filename; + const char * restrict new_data_filename; + + const char * restrict irc_server_addr; + const char * restrict irc_server_port; + const char * restrict irc_server_channel; + const char * restrict irc_username; + const char * restrict irc_realname; + + int reply_rate; + + int aliases_count; + const char * restrict * restrict aliases; +}; + +#endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index fe28080..1e1daa8 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,13 +1,10 @@ set( SRC_FILES ${SRC_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/char.c - ${CMAKE_CURRENT_SOURCE_DIR}/main.c - ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c - ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c - ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c - ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c - ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c + ${CMAKE_CURRENT_SOURCE_DIR}/index.c ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_from_string.c ) set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) diff --git a/src/core/assimilate.c b/src/core/assimilate.c deleted file mode 100644 index 7f03e1b..0000000 --- a/src/core/assimilate.c +++ /dev/null @@ -1,281 +0,0 @@ -#include -#include - -#include "../io/error.h" - -#include "knowledge.h" - -/** Functions to assimilate sentences using a ZoO_knowledge structure *********/ - - -static int add_sequence -( - ZoO_index links_count [const], - struct ZoO_knowledge_link * links [const], - ZoO_index const sequence [const restrict static ZoO_MARKOV_ORDER], - ZoO_index const target_i, - ZoO_index const offset -) -{ - ZoO_index link_index, i; - struct ZoO_knowledge_link * link; - ZoO_index * new_p; - - if - ( - ZoO_knowledge_get_link - ( - links_count, - links, - (sequence + offset), - &link_index - ) < 0 - ) - { - return -1; - } - - link = (*links + link_index); - link->occurrences += 1; - - for (i = 0; i < link->targets_count; ++i) - { - if (link->targets[i] == sequence[target_i]) - { - link->targets_occurrences[i] += 1; - - return 0; - } - } - - link->targets_count += 1; - - new_p = - (ZoO_index *) realloc - ( - (void *) link->targets, - (sizeof(ZoO_index) * link->targets_count) - ); - - if (new_p == (ZoO_index *) NULL) - { - link->targets_count -= 1; - - /* TODO: err. */ - return -1; - } - - link->targets = new_p; - link->targets[link->targets_count - 1] = sequence[target_i]; - - new_p = - (ZoO_index *) realloc - ( - (void *) link->targets_occurrences, - (sizeof(ZoO_index) * link->targets_count) - ); - - if (new_p == (ZoO_index *) NULL) - { - link->targets_count -= 1; - - /* TODO: err. */ - return -1; - } - - link->targets_occurrences = new_p; - link->targets_occurrences[link->targets_count - 1] = 1; - - return 0; -} - -static int add_word_occurrence -( - struct ZoO_knowledge k [const restrict static 1], - ZoO_index const sequence [const static ((ZoO_MARKOV_ORDER * 2) + 1)] -) -{ - ZoO_index w; - int error; - - w = sequence[ZoO_MARKOV_ORDER]; - - error = - add_sequence - ( - &(k->words[w].forward_links_count), - &(k->words[w].forward_links), - sequence + (ZoO_MARKOV_ORDER + 1), - (ZoO_MARKOV_ORDER - 1), - 0 - ); - - error = - ( - add_sequence - ( - &(k->words[w].backward_links_count), - &(k->words[w].backward_links), - sequence, - 0, - 1 - ) - | error - ); - - return error; -} - -static int should_assimilate -( - struct ZoO_strings string [const restrict static 1], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count] -) -{ - ZoO_index i; - - /* Don't assimilate empty strings. */ - if (string->words_count == 0) - { - return 0; - } - - /* Don't assimilate things that start with our name. */ - for (i = 0; i < aliases_count; ++i) - { - if (ZoO_IS_PREFIX(aliases[i], string->words[0])) - { - return 0; - } - } - - return 1; -} - -static int init_sequence -( - struct ZoO_knowledge k [const static 1], - struct ZoO_strings string [const restrict static 1], - ZoO_index sequence [const restrict static ((ZoO_MARKOV_ORDER * 2) + 1)] -) -{ - ZoO_index i; - - /* We are going to link this sequence to ZoO_WORD_START_OF_LINE */ - sequence[ZoO_MARKOV_ORDER] = ZoO_WORD_START_OF_LINE; - - for (i = 1; i <= ZoO_MARKOV_ORDER; ++i) - { - sequence[ZoO_MARKOV_ORDER - i] = ZoO_WORD_START_OF_LINE; - - if (i <= string->words_count) - { - if - ( - ZoO_knowledge_learn - ( - k, - string->words[i - 1], - (sequence + (ZoO_MARKOV_ORDER + i)) - ) < 0 - ) - { - return -1; - } - } - else - { - sequence[ZoO_MARKOV_ORDER + i] = ZoO_WORD_END_OF_LINE; - } - } - - return 0; -} - -int ZoO_knowledge_assimilate -( - struct ZoO_knowledge k [const static 1], - struct ZoO_strings string [const restrict static 1], - ZoO_index const aliases_count, - const char * restrict aliases [const restrict static aliases_count] -) -{ - int error; - ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1]; - ZoO_index next_word, new_word, new_word_id; - - if (!should_assimilate(string, aliases_count, aliases)) - { - return 0; - } - - if (init_sequence(k, string, sequence) < 0) - { - return -1; - } - - if (add_word_occurrence(k, sequence) < 0) - { - error = -1; - - /* There's a pun... */ - ZoO_S_WARNING("Could not add a link between words."); - - return -1; - } - - error = 0; - - next_word = 0; - new_word = ZoO_MARKOV_ORDER; - - while (next_word <= (string->words_count + ZoO_MARKOV_ORDER)) - { - if (new_word < string->words_count) - { - /* prevents words [restrict], k [restrict] */ - if (ZoO_knowledge_learn(k, string->words[new_word], &new_word_id) < 0) - { - return -1; - } - } - else - { - new_word_id = ZoO_WORD_END_OF_LINE; - } - - memmove - ( - (void *) sequence, - (const void *) (sequence + 1), - /* Accepts 0. */ - (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER * 2)) - ); - - sequence[ZoO_MARKOV_ORDER * 2] = new_word_id; - - if (add_word_occurrence(k, sequence) < 0) - { - error = -1; - - /* There's a pun... */ - ZoO_S_WARNING("Could not add a link between words."); - - return -1; - } - - /* - * Safe: - * - next_word < words_count - * - words_count =< ZoO_INDEX_MAX - * ---- - * next_word < ZoO_INDEX_MAX - */ - next_word += 1; - new_word += 1; - } - - return error; -} - diff --git a/src/core/char.c b/src/core/char.c index 39ca72e..9297643 100644 --- a/src/core/char.c +++ b/src/core/char.c @@ -2,6 +2,18 @@ #include "char.h" +/* See: "char.c" */ +ZoO_char ZoO_char_to_lowercase (const ZoO_char c) +{ + if ((c >= 'A') && (c <= 'Z')) + { + return 'z' - ('Z' - c); + } + + return c; +} + +/* See: "char.c" */ int ZoO_char_is_banned (const ZoO_char c) { switch (c) @@ -21,6 +33,7 @@ int ZoO_char_is_banned (const ZoO_char c) } } +/* See: "char.c" */ int ZoO_char_is_punctuation (const ZoO_char c) { switch (c) @@ -38,11 +51,14 @@ int ZoO_char_is_punctuation (const ZoO_char c) } } +/* See: "char.c" */ int ZoO_word_cmp ( const ZoO_char word_a [const static 1], + const size_t word_a_size, const ZoO_char word_b [const static 1] ) { - return strcmp((const char *) word_a, (const char *) word_b); + return strncmp((const char *) word_a, (const char *) word_b, word_a_size); } + diff --git a/src/core/char.h b/src/core/char.h index 772a3a2..2b4a355 100644 --- a/src/core/char.h +++ b/src/core/char.h @@ -3,19 +3,42 @@ #include "char_types.h" -enum ZoO_word_property ZoO_get_word_property -( - const ZoO_char word [const restrict], - size_t word_size -); - +/* Compares two words. {word_a} does not have to be null terminated. */ +/*@ + @ requires null_terminated_string(word_b); + @ requires ((length(word_a) * sizeof(ZoO_char)) == word_a_size); + @ ensures ((\result == 1) || (\result == 0) || (\result == -1)); + @*/ int ZoO_word_cmp ( const ZoO_char word_a [const static 1], + const size_t word_a_size, const ZoO_char word_b [const static 1] ); +/* + * Returns the lowercase equivalent of ZoO_char that are included in ['A','Z']. + * Other ZoO_char are returned untouched. + */ +ZoO_char ZoO_char_to_lowercase (const ZoO_char c); + +/* + * Returns '1' iff {c} should be considered as an punctuation character, '0' + * otherwise. + */ +/*@ + @ ensures ((\result == 1) || (\result == 0)); + @*/ int ZoO_char_is_punctuation (const ZoO_char c); + +/* + * Returns '1' iff containing {c} means the word should not be learned. '0' + * otherwise. + */ +/*@ + @ ensures ((\result == 1) || (\result == 0)); + @*/ int ZoO_word_char_is_banned (const ZoO_char c); #endif + diff --git a/src/core/char_types.h b/src/core/char_types.h index 67b5294..a2a736c 100644 --- a/src/core/char_types.h +++ b/src/core/char_types.h @@ -1,12 +1,16 @@ #ifndef _ZoO_CORE_CHAR_TYPES_H_ #define _ZoO_CORE_CHAR_TYPES_H_ - +/* + * FIXME: Does not belong here. + */ +/* enum ZoO_word_property { ZoO_WORD_NO_PROPERTY, ZoO_WORD_HAS_NO_LEFT_SEPARATOR, ZoO_WORD_HAS_NO_RIGHT_SEPARATOR }; +*/ /* ZoO_char = UTF-8 char */ typedef char ZoO_char; diff --git a/src/core/index.c b/src/core/index.c new file mode 100644 index 0000000..375e0ad --- /dev/null +++ b/src/core/index.c @@ -0,0 +1,61 @@ +#include +#include + +#include "index.h" + +#if (RAND_MAX < UCHAR_MAX) + #error "RAND_MAX < UCHAR_MAX, unable to generate random numbers." +#endif + +#if (RAND_MAX == 0) + #error "RAND_MAX is included in [0, 0]. What are you even doing?" +#endif + +/* + * Returns a random unsigned char. + */ +static unsigned char random_uchar (void) +{ + return + (unsigned char) + ( + /* FIXME: Do floats allow enough precision for this? */ + ( + ((float) rand()) + / ((float) RAND_MAX) + ) + * ((float) UCHAR_MAX) + ); +} + +/* See: "index.h" */ +ZoO_index ZoO_index_random (void) +{ + ZoO_index i; + ZoO_index result; + unsigned char * result_bytes; + + result_bytes = (unsigned char *) &result; + + for (i = 0; i < sizeof(ZoO_index); ++i) + { + result_bytes[i] = random_uchar(); + } + + return result; +} + +/* See: "index.h" */ +ZoO_index ZoO_index_random_up_to (const ZoO_index max) +{ + return + (ZoO_index) + ( + /* FIXME: Do floats allow enough precision for this? */ + ( + ((float) ZoO_index_random()) + / ((float) ZoO_INDEX_MAX) + ) + * ((float) max) + ); +} diff --git a/src/core/index.h b/src/core/index.h index 76e3507..1417662 100644 --- a/src/core/index.h +++ b/src/core/index.h @@ -3,6 +3,17 @@ #include "index_types.h" +/* + * Returns a random ZoO_index. + */ +ZoO_index ZoO_index_random (void); + +/* + * Returns a random ZoO_index, included in [0, limit] + */ +/*@ + @ ensures (\result <= limit); + @*/ ZoO_index ZoO_index_random_up_to (const ZoO_index limit); #endif diff --git a/src/core/index_types.h b/src/core/index_types.h index 2d769ca..ad56d52 100644 --- a/src/core/index_types.h +++ b/src/core/index_types.h @@ -1,8 +1,10 @@ #ifndef _ZoO_CORE_INDEX_TYPES_H_ #define _ZoO_CORE_INDEX_TYPES_H_ +/* Must be unsigned. */ typedef unsigned int ZoO_index; +/* Must be > 0. */ #define ZoO_INDEX_MAX UINT_MAX #endif diff --git a/src/core/knowledge.c b/src/core/knowledge.c deleted file mode 100644 index 94d76cd..0000000 --- a/src/core/knowledge.c +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include -#include /* defines SIZE_MAX */ - -#include "../io/error.h" - -#include "knowledge.h" - -/** Basic functions of the ZoO_knowledge structure ****************************/ -void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) -{ - k->words = (struct ZoO_knowledge_word *) NULL; - k->words_length = 0; - k->words_sorted = (ZoO_index *) NULL; -} diff --git a/src/core/knowledge.h b/src/core/knowledge.h deleted file mode 100644 index 057e436..0000000 --- a/src/core/knowledge.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef _ZoO_CORE_KNOWLEDGE_H_ -#define _ZoO_CORE_KNOWLEDGE_H_ - -#include "../core/char_types.h" -#include "../core/index_types.h" - -#include "knowledge_types.h" - -void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1]); - -void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]); - - -/* - * When returning 0: - * {word} was either added to {k} or its representation in {k} has its - * occurrences count increased. - * {*result} indicates where {word} is in {k->words}. - * - * When returning -1: - * Something went wrong when adding the occurrence of {word} to {k}. - * {k} remains semantically unchanged. - * {*result} may or may not have been altered. - */ -int ZoO_knowledge_learn -( - struct ZoO_knowledge k [const static 1], - const ZoO_char word [const restrict static 1], - ZoO_index result [const restrict static 1] -); - -int ZoO_knowledge_learn_sequence -( - struct ZoO_knowledge k [const static 1], - const ZoO_index sequence [const restrict], - const ZoO_index sequence_length -); - -int ZoO_knowledge_get_following_sequences_ref -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index initial_word, - const ZoO_index * restrict following_sequences_ref [const restrict static 1], - const ZoO_index * restrict following_sequences_weights [const restrict static 1], - ZoO_index following_sequences_weights_sum [const static 1] -); - -int ZoO_knowledge_get_sequence -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index sequences_ref, - const ZoO_index * restrict sequence [const restrict static 1] -); - -int ZoO_knowledge_get_word -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index word_ref, - const ZoO_char * word [const restrict static 1], - size_t word_size [const restrict static 1] -); - -/* - * When returning 0: - * {word} is in {k}. - * {word} is located at {k->words[*result]}. - * - * When returning -1: - * {word} is not in {k}. - * {*result} is where {word} was expected to be found in - * {k->sorted_indices}. - */ -int ZoO_knowledge_find_word_id -( - const struct ZoO_knowledge k [const restrict static 1], - const ZoO_char word [const restrict static 1], - ZoO_index result [const restrict static 1] -); - -int ZoO_knowledge_find_preceding_words -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index sequence [const restrict], - const ZoO_index markov_order, - const ZoO_index * restrict preceding_words [const restrict static 1], - const ZoO_index * restrict preceding_words_weights [const restrict static 1], - ZoO_index preceding_words_weights_sum [const restrict static 1] -); - -int ZoO_knowledge_find_following_words -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index sequence [const restrict], - const ZoO_index sequence_length, - const ZoO_index markov_order, - const ZoO_index * restrict following_words [const restrict static 1], - const ZoO_index * restrict following_words_weights [const restrict static 1], - ZoO_index following_words_weights_sum [const restrict static 1] -); - -#endif diff --git a/src/core/knowledge_finalize.c b/src/core/knowledge_finalize.c deleted file mode 100644 index e4deda6..0000000 --- a/src/core/knowledge_finalize.c +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include /* defines SIZE_MAX */ - -#include "../io/error.h" - -#include "knowledge.h" - -void knowledge_sequence_collection_finalize -( - struct ZoO_knowledge_sequence_collection c [const restrict static 1] -) -{ - ZoO_index i; - - if (c->sequences_ref != (ZoO_index *) NULL) - { - free((void *) c->sequences_ref); - c->sequences_ref = (ZoO_index *) NULL; - } - - if (c->sequences_ref_sorted != (ZoO_index *) NULL) - { - free((void *) c->sequences_ref_sorted); - c->sequences_ref_sorted = (ZoO_index *) NULL; - } - - if (c->occurrences != (ZoO_index *) NULL) - { - free((void *) c->occurrences); - c->occurrences = (ZoO_index *) NULL; - } - - for (i = 0; i < c->sequences_ref_length; ++i) - { - free((void *) c->targets[i]); - free((void *) c->targets_occurrences[i]); - } - - c->sequences_ref_length = 0; - - if (c->targets != (ZoO_index **) NULL) - { - free((void *) c->targets); - c->targets != (ZoO_index **) NULL; - } - - free((void *) c->targets_length); - - if (c->targets_occurrences != (ZoO_index **) NULL) - { - free((void *) c->targets_occurrences); - c->targets_occurrences != (ZoO_index **) NULL; - } -} - -void knowledge_word_finalize -( - struct ZoO_knowledge_word w [const restrict static 1] -) -{ - w->word_size = 0; - w->occurrences = 0; - - if (w->word != (ZoO_char *) NULL) - { - free((void *) w->word); - - w->word = (ZoO_char *) NULL; - } - - knowledge_sequence_collection_finalize(&(w->followed)); - knowledge_sequence_collection_finalize(&(w->preceded)); -} - -void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) -{ - ZoO_index i; - - for (i = 0; i < k->words_length; ++i) - { - knowledge_word_finalize(k->words + i); - } - - k->words_length = 0; - - if (k->words != (struct ZoO_knowledge_word *) NULL) - { - free((void *) k->words); - - k->words = (struct ZoO_knowledge_word *) NULL; - } - - if (k->words_sorted != (ZoO_index *) NULL) - { - free((void *) k->words_sorted); - - k->words_sorted = (ZoO_index *) NULL; - } - - for (i = 0; i < k->sequences_length; ++i) - { - free((void *) k->sequences[i]); - } - - k->sequences_length = 0; - - if (k->sequences != (ZoO_index **) NULL) - { - free((void *) k->sequences); - - k->sequences = (ZoO_index **) NULL; - } - - if (k->sequences_sorted != (ZoO_index *) NULL) - { - free((void *) k->sequences_sorted); - - k->sequences_sorted = (ZoO_index *) NULL; - } -} diff --git a/src/core/knowledge_search.c b/src/core/knowledge_search.c deleted file mode 100644 index d0c61ef..0000000 --- a/src/core/knowledge_search.c +++ /dev/null @@ -1,335 +0,0 @@ -#include - -#include "../core/char.h" -#include "../core/index.h" -#include "../core/sequence.h" - -#include "../io/error.h" - -#include "knowledge.h" - -/* See "knowledge.h". */ -int ZoO_knowledge_find_word_id -( - const struct ZoO_knowledge k [const restrict static 1], - const ZoO_char word [const restrict static 1], - ZoO_index result [const restrict static 1] -) -{ - /* This is a binary search */ - int cmp; - ZoO_index i, current_min, current_max; - ZoO_index candidate_id; - - /* Handles the case where the list is empty ********************************/ - current_max = k->words_length; - - if (current_max == 0) - { - *result = 0; - - return -1; - } - /***************************************************************************/ - - current_min = 0; - current_max -= 1; - - for (;;) - { - i = (current_min + ((current_max - current_min) / 2)); - - cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word); - - if (cmp > 0) - { - current_min = (i + 1); - - if (current_min > current_max) - { - *result = current_min; - - return -1; - } - } - else if (cmp < 0) - { - if ((current_min > current_max) || (i == 0)) - { - *result = current_min; - - return -1; - } - - current_max = (i - 1); - } - else - { - *result = i; - - return 0; - } - } -} - -int ZoO_knowledge_find_preceding_words -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index sequence [const restrict], - const ZoO_index markov_order, /* Pre: (> 0) */ - const ZoO_index * restrict preceding_words [const restrict static 1], - const ZoO_index * restrict preceding_words_weights [const restrict static 1], - ZoO_index preceding_words_weights_sum [const restrict static 1] -) -{ - /* This is a binary search */ - int cmp; - ZoO_index i, current_min, current_max, local_sequence; - const ZoO_index * restrict candidate; - const ZoO_index markov_sequence_length = (markov_order - 1); - const ZoO_index word = sequence[markov_sequence_length]; - - if (word >= k->words_length) - { - ZoO_S_ERROR - ( - "Attempting to find the preceding words of an unknown word." - ); - - *preceding_words = (const ZoO_index *) NULL; - *preceding_words_weights = (const ZoO_index *) NULL; - *preceding_words_weights_sum = 0; - - return -1; - } - - - if (markov_order == 1) - { - /* Special case: empty sequences. */ - *preceding_words = (const ZoO_index *) k->words[word].preceded.targets; - - *preceding_words_weights = - (const ZoO_index *) k->words[word].preceded.targets_occurrences; - - *preceding_words_weights_sum = k->words[word].occurrences; - - return 0; - } - - /* Handles the case where the list is empty ********************************/ - current_max = k->words[word].preceded.sequences_ref_length; - - if (current_max == 0) - { - *preceding_words = (const ZoO_index *) NULL; - *preceding_words_weights = (const ZoO_index *) NULL; - *preceding_words_weights_sum = 0; - - ZoO_S_ERROR - ( - "Attempting to find the preceding words of a sequence that never had " - "any." - ); - - return -2; - } - /***************************************************************************/ - - current_min = 0; - current_max -= 1; - - for (;;) - { - i = (current_min + ((current_max - current_min) / 2)); - - local_sequence = k->words[word].preceded.sequences_ref_sorted[i]; - - (void) ZoO_knowledge_get_sequence - ( - k, - k->words[word].preceded.sequences_ref[local_sequence], - &candidate - ); - - cmp = - ZoO_sequence_cmp - ( - sequence, - markov_sequence_length, - candidate, - markov_sequence_length - ); - - if (cmp > 0) - { - current_min = (i + 1); - - if (current_min > current_max) - { - *preceding_words = (const ZoO_index *) NULL; - *preceding_words_weights = (const ZoO_index *) NULL; - *preceding_words_weights_sum = 0; - - return -2; - } - } - else if (cmp < 0) - { - if ((current_min > current_max) || (i == 0)) - { - *preceding_words = (const ZoO_index *) NULL; - *preceding_words_weights = (const ZoO_index *) NULL; - *preceding_words_weights_sum = 0; - - return -2; - } - - current_max = (i - 1); - } - else - { - *preceding_words = k->words[word].preceded.targets[local_sequence]; - - *preceding_words_weights = - k->words[word].preceded.targets_occurrences[local_sequence]; - - *preceding_words_weights_sum = - k->words[word].preceded.occurrences[local_sequence]; - - return 0; - } - } -} - -int ZoO_knowledge_find_following_words -( - const struct ZoO_knowledge k [const static 1], - const ZoO_index sequence [const restrict], - const ZoO_index sequence_length, - const ZoO_index markov_order, - const ZoO_index * restrict following_words [const restrict static 1], - const ZoO_index * restrict following_words_weights [const restrict static 1], - ZoO_index following_words_weights_sum [const restrict static 1] -) -{ - /* This is a binary search */ - int cmp; - ZoO_index i, current_min, current_max, local_sequence; - const ZoO_index * restrict candidate; - const ZoO_index markov_sequence_length = (markov_order - 1); - const ZoO_index sequence_offset = - ((sequence_length - markov_sequence_length) - 1); - const ZoO_index word = sequence[sequence_offset]; - - if (word >= k->words_length) - { - ZoO_S_ERROR - ( - "Attempting to find the following words of an unknown word." - ); - - *following_words = (const ZoO_index *) NULL; - *following_words_weights = (const ZoO_index *) NULL; - *following_words_weights_sum = 0; - - return -1; - } - - if (markov_order == 1) - { - /* Special case: empty sequences. */ - *following_words = (const ZoO_index *) k->words[word].preceded.targets; - - *following_words_weights = - (const ZoO_index *) k->words[word].preceded.targets_occurrences; - - *following_words_weights_sum = k->words[word].occurrences; - - return 0; - } - - /* Handles the case where the list is empty ********************************/ - current_max = k->words[word].preceded.sequences_ref_length; - - if (current_max == 0) - { - *following_words = (const ZoO_index *) NULL; - *following_words_weights = (const ZoO_index *) NULL; - *following_words_weights_sum = 0; - - ZoO_S_WARNING - ( - "Attempting to find the following words of a sequence that never had " - "any." - ); - - return -2; - } - /***************************************************************************/ - - current_min = 0; - current_max -= 1; - - for (;;) - { - i = (current_min + ((current_max - current_min) / 2)); - - local_sequence = k->words[word].followed.sequences_ref_sorted[i]; - - (void) ZoO_knowledge_get_sequence - ( - k, - k->words[word].followed.sequences_ref[local_sequence], - &candidate - ); - - cmp = - ZoO_sequence_cmp - ( - (sequence + sequence_offset), - markov_sequence_length, - candidate, - markov_sequence_length - ); - - if (cmp > 0) - { - current_min = (i + 1); - - if (current_min > current_max) - { - *following_words = (const ZoO_index *) NULL; - *following_words_weights = (const ZoO_index *) NULL; - *following_words_weights_sum = 0; - - return -2; - } - } - else if (cmp < 0) - { - if ((current_min > current_max) || (i == 0)) - { - *following_words = (const ZoO_index *) NULL; - *following_words_weights = (const ZoO_index *) NULL; - *following_words_weights_sum = 0; - - return -2; - } - - current_max = (i - 1); - } - else - { - *following_words = k->words[word].followed.targets[local_sequence]; - - *following_words_weights = - k->words[word].followed.targets_occurrences[local_sequence]; - - *following_words_weights_sum = - k->words[word].followed.occurrences[local_sequence]; - - return 0; - } - } -} diff --git a/src/core/knowledge_types.h b/src/core/knowledge_types.h deleted file mode 100644 index acd239f..0000000 --- a/src/core/knowledge_types.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_ -#define _ZoO_CORE_KNOWLEDGE_TYPES_H_ - -#include "../core/index_types.h" -#include "../core/char_types.h" - -struct ZoO_knowledge_sequence_collection -{ - ZoO_index * sequences_ref; - ZoO_index sequences_ref_length; - ZoO_index * sequences_ref_sorted; - ZoO_index * occurrences; - ZoO_index ** targets; - ZoO_index * targets_length; - ZoO_index ** targets_occurrences; -}; - -struct ZoO_knowledge_word -{ - const ZoO_char * word; - size_t word_size; - ZoO_index occurrences; - struct ZoO_knowledge_sequence_collection followed; - struct ZoO_knowledge_sequence_collection preceded; -}; - -struct ZoO_knowledge -{ - struct ZoO_knowledge_word * words; - ZoO_index words_length; - ZoO_index * words_sorted; - ZoO_index ** sequences; - ZoO_index sequences_length; - ZoO_index * sequences_sorted; -}; - -#endif diff --git a/src/core/main.c b/src/core/main.c deleted file mode 100644 index bb4ae23..0000000 --- a/src/core/main.c +++ /dev/null @@ -1,436 +0,0 @@ -#include -#include -#include -#include -#include - -#include "../tool/strings.h" - -#include "../io/error.h" -#include "../io/parameters.h" -#include "../io/data_input.h" -#include "../io/data_output.h" -#include "../io/network.h" - -#include "knowledge.h" - -#include "state_types.h" - -static int run = 1; - -static void request_termination (int const signo) -{ - if ((signo == SIGINT) || (signo == SIGTERM)) - { - run = 0; - } -} - -static int initialize -( - struct ZoO_state s [const static 1], - int const argc, - const char * argv [const static argc] -) -{ - ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing..."); - - srand(time(NULL)); - - /* prevents s [restrict] */ - if (ZoO_knowledge_initialize(&(s->knowledge)) < 0) - { - return -1; - } - - if (ZoO_parameters_initialize(&(s->param), argc, argv) < 1) - { - ZoO_knowledge_finalize(&(s->knowledge)); - - return -1; - } - - return 0; -} - -static int load_data_file (struct ZoO_state s [const static 1]) -{ - struct ZoO_data_input input; - char * result; - - if (ZoO_data_input_open(&input, s->param.data_filename) < 0) - { - return -1; - } - - while - ( - ZoO_data_input_read_line - ( - &input, - ZoO_knowledge_punctuation_chars_count, - ZoO_knowledge_punctuation_chars - ) == 0 - ) - { - (void) ZoO_knowledge_assimilate - ( - &(s->knowledge), - &(input.string), - s->param.aliases_count, - s->param.aliases - ); - } - - ZoO_data_input_close(&input); - - return 0; -} - -static int finalize (struct ZoO_state s [const static 1]) -{ - int error; - - ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing..."); - - error = 0; - - /* prevents s [restrict] */ - ZoO_knowledge_finalize(&(s->knowledge)); - - return error; -} - -static int network_connect (struct ZoO_state s [const static 1]) -{ - return - ZoO_network_connect - ( - &(s->network), - s->param.irc_server_addr, - s->param.irc_server_port, - s->param.irc_server_channel, - s->param.irc_username, - s->param.irc_realname, - s->param.aliases[0] - ); -} - -static int should_reply -( - struct ZoO_parameters param [const restrict static 1], - struct ZoO_strings string [const restrict static 1], - int should_learn [const restrict static 1] -) -{ - ZoO_index i, j; - - for (i = 0; i < param->aliases_count; ++i) - { - if (ZoO_IS_PREFIX(param->aliases[i], string->words[0])) - { - *should_learn = 0; - - return 1; - } - - for (j = 1; j < string->words_count; ++j) - { - if (ZoO_IS_PREFIX(param->aliases[i], string->words[j])) - { - *should_learn = 1; - - return 1; - } - } - } - - *should_learn = 1; - - return (param->reply_rate >= (rand() % 100)); -} - -static void handle_user_join -( - struct ZoO_state s [const static 1], - struct ZoO_strings string [const restrict static 1], - ssize_t const msg_offset, - ssize_t const msg_size -) -{ - ZoO_char * line; - ZoO_index loc; - - if (s->param.reply_rate < (rand() % 100)) - { - return; - } - - if - ( - ZoO_strings_parse - ( - string, - (size_t) msg_size, - (s->network.in + msg_offset), - ZoO_knowledge_punctuation_chars_count, - ZoO_knowledge_punctuation_chars - ) < 0 - ) - { - ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect join username."); - - return; - } - - if - ( - ( - ZoO_knowledge_find - ( - &(s->knowledge), - string->words[0], - &loc - ) < 0 - ) - || (s->knowledge.words[loc].backward_links_count <= 3) - || (s->knowledge.words[loc].forward_links_count <= 3) - ) - { - if - ( - ZoO_knowledge_extend - ( - &(s->knowledge), - (struct ZoO_strings *) NULL, - 0, - (const char **) NULL, - &line - ) == 0 - ) - { - if (line[0] == ' ') - { - strcpy((s->network.out), (line + 1)); - } - else - { - strcpy((s->network.out), line); - } - - free((void *) line); - - ZoO_network_send(&(s->network)); - } - } - else - { - if - ( - ZoO_knowledge_extend - ( - &(s->knowledge), - string, - 0, - (const char **) NULL, - &line - ) == 0 - ) - { - if (line[0] == ' ') - { - strcpy((s->network.out), (line + 1)); - } - else - { - strcpy((s->network.out), line); - } - - free((void *) line); - - ZoO_network_send(&(s->network)); - } - } -} - -static void handle_message -( - struct ZoO_state s [const static 1], - struct ZoO_strings string [const restrict static 1], - ssize_t const msg_offset, - /* FIXME: somehow we end up using (msg_size + 1), meaning there's a mixup - * between size and length. - */ - ssize_t const msg_size -) -{ - ZoO_char * line; - int reply, learn; - - if - ( - ZoO_strings_parse - ( - string, - (size_t) msg_size, - (s->network.in + msg_offset), - ZoO_knowledge_punctuation_chars_count, - ZoO_knowledge_punctuation_chars - ) < 0 - ) - { - ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg."); - - return; - } - - if (string->words_count == 0) - { - return; - } - - reply = should_reply(&(s->param), string, &learn); - - if (learn) - { - /* - * It would be best to do that after replying, but by then we no longer - * have the string in 's->network.in'. - */ - (void) ZoO_data_output_write_line - ( - s->param.new_data_filename, - (s->network.in + msg_offset), - (size_t) (msg_size + 1) - ); - } - - if - ( - reply - && - ( - ZoO_knowledge_extend - ( - &(s->knowledge), - string, - s->param.aliases_count, - s->param.aliases, - &line - ) == 0 - ) - ) - { - if (line[0] == ' ') - { - strcpy((s->network.out), (line + 1)); - } - else - { - strcpy((s->network.out), line); - } - - free((void *) line); - - ZoO_network_send(&(s->network)); - } - - if (learn) - { - (void) ZoO_knowledge_assimilate - ( - &(s->knowledge), - string, - s->param.aliases_count, - s->param.aliases - ); - } -} - -static int main_loop (struct ZoO_state s [const static 1]) -{ - struct ZoO_strings string; - ssize_t msg_offset, msg_size; - enum ZoO_msg_type msg_type; - - msg_offset = 0; - msg_size = 0; - - ZoO_strings_initialize(&string); - - while (run) - { - if - ( - ZoO_network_receive - ( - &(s->network), - &msg_offset, - &msg_size, - &msg_type - ) == 0 - ) - { - switch (msg_type) - { - case ZoO_JOIN: - handle_user_join(s, &string, msg_offset, msg_size); - break; - - case ZoO_PRIVMSG: - handle_message(s, &string, msg_offset, msg_size); - break; - } - } - } - - ZoO_strings_finalize(&string); - - ZoO_network_disconnect(&(s->network)); - - return 0; -} - -int main (int const argc, const char * argv [const static argc]) -{ - struct ZoO_state s; - - if (initialize(&s, argc, argv) < 0) - { - return -1; - } - - if (load_data_file(&s) < 0) - { - goto CRASH; - } - - if (network_connect(&s) < 0) - { - goto CRASH; - } - - if (main_loop(&s) < 0) - { - goto CRASH; - } - - (void) finalize(&s); - - ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally."); - - return 0; - - CRASH: - { - (void) finalize(&s); - - ZoO_S_DEBUG - ( - ZoO_DEBUG_PROGRAM_FLOW, - "Zero of One terminated by crashing." - ); - - return -1; - } -} diff --git a/src/core/sequence.c b/src/core/sequence.c index 9e370a3..d7ff9d0 100644 --- a/src/core/sequence.c +++ b/src/core/sequence.c @@ -5,18 +5,56 @@ #include "sequence.h" +/* + * Bypass rendundant ZoO_START_OF_SEQUENCE_ID at the start of a sequence. + */ +/* ensures (*sequence_offset <= sequence_length) */ +static void bypass_redundant_sos +( + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + ZoO_index sequence_offset [const restrict static 1] +) +{ + ZoO_index i; + + *sequence_offset = 0; + + for (i = 0; i < sequence_length; ++i) + { + if (sequence[i] != ZoO_START_OF_SEQUENCE_ID) + { + return; + } + else if (sequence[i] == ZoO_START_OF_SEQUENCE_ID) + { + *sequence_offset = i; + } + } +} + + /* See "sequence.h" */ int ZoO_sequence_cmp ( const ZoO_index sequence_a [const], - const ZoO_index sequence_a_length, + ZoO_index sequence_a_length, const ZoO_index sequence_b [const], - const ZoO_index sequence_b_length + ZoO_index sequence_b_length ) { - ZoO_index min_length; + ZoO_index min_length, a, b; + ZoO_index a_offset, b_offset; ZoO_index i; + bypass_redundant_sos(sequence_a, sequence_a_length, &a_offset); + bypass_redundant_sos(sequence_b, sequence_b_length, &b_offset); + + /*@ requires (*a_offset <= sequence_a_length) @*/ + sequence_a_length -= a_offset; + /*@ requires (*b_offset <= sequence_b_length) @*/ + sequence_b_length -= b_offset; + if (sequence_a_length < sequence_b_length) { min_length = sequence_a_length; @@ -26,47 +64,37 @@ int ZoO_sequence_cmp min_length = sequence_b_length; } + /*@ ensures (min_length <= sequence_a_length) @*/ + /*@ ensures (min_length <= sequence_b_length) @*/ + for (i = 0; i < min_length; ++i) { - if (sequence_a[i] < sequence_b[i]) + /*@ requires ((i + a_offset) < sequence_a_length) @*/ + a = sequence_a[i + a_offset]; + /*@ requires ((i + b_offset) < sequence_b_length) @*/ + b = sequence_b[i + b_offset]; + + if (a < b) { return -1; } - else if (sequence_b[i] > sequence_b[i]) + else if (b > a) { return 1; } - else if - ( - (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) - && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) - ) + else if ((a == ZoO_END_OF_SEQUENCE_ID) && (b == ZoO_END_OF_SEQUENCE_ID)) { return 0; } } - if (sequence_a_length < sequence_b_length) + if (sequence_a_length > sequence_b_length) { - if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID) - { - return 0; - } - else - { - return -1; - } + return 1; } - else if (sequence_a_length > sequence_b_length) + else if (sequence_a_length < sequence_b_length) { - if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID) - { - return 0; - } - else - { - return 1; - } + return -1; } else { diff --git a/src/core/sequence.h b/src/core/sequence.h index e609b4d..77ecd6c 100644 --- a/src/core/sequence.h +++ b/src/core/sequence.h @@ -1,11 +1,21 @@ #ifndef _ZoO_CORE_SEQUENCE_H_ #define _ZoO_CORE_SEQUENCE_H_ +#include "../core/char_types.h" #include "../core/index_types.h" -#include "../core/knowledge_types.h" +#include "../knowledge/knowledge_types.h" #include "sequence_types.h" +int ZoO_sequence_from_undercase_string +( + const ZoO_char string [const restrict], + const ZoO_index string_length, + struct ZoO_knowledge k [const restrict static 1], + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1] +); + /* * Creates a sequence containing {initial_word}. The remaining elements of * sequence are added according to what is known to {k} as being possible. @@ -42,7 +52,13 @@ int ZoO_sequence_create_from * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to - * contain ZoO_END_OF_SEQUENCE. + * contain ZoO_END_OF_SEQUENCE. [10][ZoO_END_OF_SEQUENCE] and [10] are + * considered different, [10][ZoO_END_OF_SEQUENCE] + * and [10][ZoO_END_OF_SEQUENCE][ZoO_END_OF_SEQUENCE] are considered equal. + * Same logic is applyied for ZoO_START_OF_SEQUENCE: + * [START_OF_SEQUENCE][10] is not [10], but + * [START_OF_SEQUENCE][START_OF_SEQUENCE][10] and [START_OF_SEQUENCE][10] are + * the same. * Return: * 1 iff {sequence_a} should be considered being more than {sequence_b} * 0 iff {sequence_a} should be considered being equal to {sequence_b} diff --git a/src/core/sequence_creation.c b/src/core/sequence_creation.c index 1133be9..f460629 100644 --- a/src/core/sequence_creation.c +++ b/src/core/sequence_creation.c @@ -19,6 +19,11 @@ * (> weights_sum 0). * (= (sum weights) weights_sum). */ +/*@ + @ requires (weights_sum > 0); + @ requires \valid(weights); + @ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); +@*/ static ZoO_index weighted_random_pick ( const ZoO_index weights [const restrict static 1], @@ -29,12 +34,12 @@ static ZoO_index weighted_random_pick accumulator = 0; - /* Safe: Included in [0, weights_sum]. */ random_number = ZoO_index_random_up_to(weights_sum); + /*@ ensures (0 <= random_number <= weights_sum); @*/ for (result = 0; accumulator < random_number; ++result) { - /* Safe: (= (sum weights) weights_sum) */ + /*@ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); @*/ accumulator += weights[result]; } diff --git a/src/core/sequence_from_string.c b/src/core/sequence_from_string.c new file mode 100644 index 0000000..51d7049 --- /dev/null +++ b/src/core/sequence_from_string.c @@ -0,0 +1,315 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../cli/cli.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +static int add_word_id_to_sequence +( + const ZoO_index word_id, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1] +) +{ + ZoO_index * new_sequence; + + *sequence_length += 1; + + new_sequence = + (ZoO_index *) realloc + ( + (void *) *sequence, + (((size_t) sequence_length) * sizeof(ZoO_index)) + ); + + if (new_sequence == (ZoO_index *) NULL) + { + ZoO_S_ERROR("Unable to reallocate a sequence to add word ids to it."); + + return -1; + } + + return 0; +} + +/******************************************************************************/ +/** HANDLING PUNCTUATION ******************************************************/ +/******************************************************************************/ +static int add_punctuation_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_char punctuation, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + const struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index word_id; + ZoO_char as_word[2]; + + as_word[0] = punctuation; + as_word[1] = '\0'; + + if (ZoO_knowledge_find_word_id(k, as_word, 2, &word_id) < 0) + { + ZoO_PROG_ERROR + ( + "'%s' was defined as a punctuation, was found in a string, yet is not" + " defined in the knowledge database.", + as_word + ); + + return -1; + } + + if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) + { + return -1; + } + + return 0; +} + +static int word_is_punctuation_terminated +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length +) +{ + return ZoO_char_is_punctuation(string[word_length]); +} + +/******************************************************************************/ +/** HANDLING WORDS ************************************************************/ +/******************************************************************************/ +static int add_word_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index word_id; + ZoO_char * stored_word; + + if (word_length == 0) + { + return 0; + } + + if + ( + ZoO_knowledge_learn_word + ( + k, + (string + word_start), + word_length, + &word_id + ) < 0 + ) + { + return -1; + } + + if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0) + { + return -1; + } + + return 0; +} + +static int add_finding_to_sequence +( + const ZoO_char string [const restrict static 1], + const ZoO_index word_start, + const ZoO_index word_length, + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index punctuation; + + if (word_is_punctuation_terminated(string, word_start, word_length)) + { + punctuation = 1; + } + else + { + punctuation = 0; + } + + if + ( + add_word_to_sequence + ( + string, + word_start, + (word_length - punctuation), + sequence, + sequence_length, + k + ) < 0 + ) + { + return -1; + } + + if + ( + (punctuation == 1) + && + ( + add_punctuation_to_sequence + ( + string, + string[word_start + word_length - 1], + sequence, + sequence_length, + k + ) < 0 + ) + ) + { + return -1; + } + + return 0; +} + +static int find_word +( + const ZoO_char string [const restrict static 1], + const ZoO_index string_length, + const ZoO_index offset, + ZoO_index word_start [const restrict static 1], + ZoO_index word_length [const restrict static 1] +) +{ + ZoO_index i; + + i = offset; + + while ((string[i] == ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_start = i; + + while ((string[i] != ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_length = (i - *word_start); + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ +int ZoO_sequence_from_undercase_string +( + const ZoO_char string [const restrict], + const ZoO_index string_length, + struct ZoO_knowledge k [const restrict static 1], + ZoO_index * sequence [const restrict static 1], + ZoO_index sequence_length [const restrict static 1] +) +{ + ZoO_index word_start, word_length; + ZoO_index i; + + i = 0; + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + if + ( + add_word_id_to_sequence + ( + ZoO_START_OF_SEQUENCE_ID, + sequence, + sequence_length + ) < 0 + ) + { + return -1; + } + + while (i < string_length) + { + if (find_word(string, i, string_length, &word_start, &word_length) < 0) + { + break; + } + + if + ( + add_finding_to_sequence + ( + string, + word_start, + word_length, + sequence, + sequence_length, + k + ) < 0 + ) + { + free((void *) *sequence); + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + i = (word_start + word_length); + } + + if + ( + add_word_id_to_sequence + ( + ZoO_END_OF_SEQUENCE_ID, + sequence, + sequence_length + ) < 0 + ) + { + free((void *) *sequence); + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + return 0; +} diff --git a/src/core/sequence_types.h b/src/core/sequence_types.h index 717d418..c260a8a 100644 --- a/src/core/sequence_types.h +++ b/src/core/sequence_types.h @@ -3,7 +3,8 @@ #define ZoO_START_OF_SEQUENCE_ID 0 #define ZoO_END_OF_SEQUENCE_ID 1 +#define ZoO_ACTION_SEQUENCE_ID 2 -#define ZoO_RESERVED_IDS_COUNT 2 +#define ZoO_RESERVED_IDS_COUNT 3 #endif diff --git a/src/file/data_input.c b/src/file/data_input.c new file mode 100644 index 0000000..e31d33b --- /dev/null +++ b/src/file/data_input.c @@ -0,0 +1,98 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include /* defines SIZE_MAX */ + +#include "error.h" + +#include "data_input.h" + +int ZoO_data_input_open +( + struct ZoO_data_input di [const static 1], + const char filename [const restrict static 1] +) +{ + /* prevents di [restrict] */ + ZoO_strings_initialize(&(di->string)); + + di->file = fopen(filename, "r"); + + if (di->file == (FILE *) NULL) + { + ZoO_ERROR + ( + "Could not open file '%s' in readonly mode.", + filename + ); + + return -1; + } + + return 0; +} + +int ZoO_data_input_read_line +( + struct ZoO_data_input di [const static 1], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +) +{ + size_t line_size, i, w_start; + ZoO_char * line; + + /* prevents di [restrict] */ + ZoO_strings_finalize(&(di->string)); + + line = (ZoO_char *) NULL; + line_size = 0; + + /* XXX: assumed compatible with ZoO_char */ + + if (getline(&line, &line_size, di->file) < 1) + { + free((void *) line); + + return -1; + } + + line_size = strlen(line); + line[line_size - 1] = '\0'; + + --line_size; /* removed '\n' */ + + if + ( + ZoO_strings_parse + ( + &(di->string), + line_size, + line, + punctuations_count, + punctuations + ) < 0 + ) + { + free((void *) line); + + return -1; + } + + free((void *) line); + + return 0; +} + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]) +{ + if (di->file != (FILE *) NULL) + { + fclose(di->file); + + di->file = (FILE *) NULL; + } + + /* prevents di [restrict] */ + ZoO_strings_finalize(&(di->string)); +} diff --git a/src/file/data_input.h b/src/file/data_input.h new file mode 100644 index 0000000..a2f004b --- /dev/null +++ b/src/file/data_input.h @@ -0,0 +1,21 @@ +#ifndef _ZoO_IO_DATA_INPUT_H_ +#define _ZoO_IO_DATA_INPUT_H_ + +#include "data_input_types.h" + +int ZoO_data_input_open +( + struct ZoO_data_input di [const static 1], + const char filename [const restrict static 1] +); + +int ZoO_data_input_read_line +( + struct ZoO_data_input di [const static 1], + ZoO_index const punctuations_count, + const ZoO_char punctuations [const restrict static punctuations_count] +); + +void ZoO_data_input_close (struct ZoO_data_input di [const static 1]); + +#endif diff --git a/src/file/data_input_types.h b/src/file/data_input_types.h new file mode 100644 index 0000000..bd2709b --- /dev/null +++ b/src/file/data_input_types.h @@ -0,0 +1,16 @@ +#ifndef _ZoO_IO_DATA_INPUT_TYPES_H_ +#define _ZoO_IO_DATA_INPUT_TYPES_H_ + +#include + +#include "../pervasive.h" + +#include "../tool/strings.h" + +struct ZoO_data_input +{ + FILE * restrict file; + struct ZoO_strings string; +}; + +#endif diff --git a/src/file/data_output.c b/src/file/data_output.c new file mode 100644 index 0000000..04e3964 --- /dev/null +++ b/src/file/data_output.c @@ -0,0 +1,66 @@ +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include +#include /* defines SIZE_MAX */ +#include + +#include "error.h" + +#include "data_output.h" + +int ZoO_data_output_write_line +( + const char filename [const restrict static 1], + char line [const restrict static 1], + size_t const line_size +) +{ + const int old_errno = errno; + FILE * file; + + file = fopen(filename, "a"); + + if (file == (FILE *) NULL) + { + ZoO_ERROR + ( + "Could not open file '%s' in appending mode.", + filename + ); + + return -1; + } + + line[line_size - 1] = '\n'; + + if + ( + fwrite + ( + (const void *) line, + sizeof(char), + line_size, + file + ) < line_size + ) + { + line[line_size - 1] = '\0'; + + ZoO_ERROR + ( + "Could not store line '%s' in %s.", + line, + filename + ); + + fclose(file); + + return -1; + } + + fclose(file); + + return 0; +} diff --git a/src/file/data_output.h b/src/file/data_output.h new file mode 100644 index 0000000..ef963a0 --- /dev/null +++ b/src/file/data_output.h @@ -0,0 +1,11 @@ +#ifndef _ZoO_IO_DATA_OUTPUT_H_ +#define _ZoO_IO_DATA_OUTPUT_H_ + +int ZoO_data_output_write_line +( + const char filename [const restrict static 1], + char line [const restrict static 1], + size_t const line_size +); + +#endif diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt deleted file mode 100644 index c36413a..0000000 --- a/src/io/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set( - SRC_FILES ${SRC_FILES} - ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c - ${CMAKE_CURRENT_SOURCE_DIR}/network.c - ${CMAKE_CURRENT_SOURCE_DIR}/data_input.c - ${CMAKE_CURRENT_SOURCE_DIR}/data_output.c -) -set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) - diff --git a/src/io/data_input.c b/src/io/data_input.c deleted file mode 100644 index e31d33b..0000000 --- a/src/io/data_input.c +++ /dev/null @@ -1,98 +0,0 @@ -#define _POSIX_C_SOURCE 200809L -#include -#include -#include /* defines SIZE_MAX */ - -#include "error.h" - -#include "data_input.h" - -int ZoO_data_input_open -( - struct ZoO_data_input di [const static 1], - const char filename [const restrict static 1] -) -{ - /* prevents di [restrict] */ - ZoO_strings_initialize(&(di->string)); - - di->file = fopen(filename, "r"); - - if (di->file == (FILE *) NULL) - { - ZoO_ERROR - ( - "Could not open file '%s' in readonly mode.", - filename - ); - - return -1; - } - - return 0; -} - -int ZoO_data_input_read_line -( - struct ZoO_data_input di [const static 1], - ZoO_index const punctuations_count, - const ZoO_char punctuations [const restrict static punctuations_count] -) -{ - size_t line_size, i, w_start; - ZoO_char * line; - - /* prevents di [restrict] */ - ZoO_strings_finalize(&(di->string)); - - line = (ZoO_char *) NULL; - line_size = 0; - - /* XXX: assumed compatible with ZoO_char */ - - if (getline(&line, &line_size, di->file) < 1) - { - free((void *) line); - - return -1; - } - - line_size = strlen(line); - line[line_size - 1] = '\0'; - - --line_size; /* removed '\n' */ - - if - ( - ZoO_strings_parse - ( - &(di->string), - line_size, - line, - punctuations_count, - punctuations - ) < 0 - ) - { - free((void *) line); - - return -1; - } - - free((void *) line); - - return 0; -} - -void ZoO_data_input_close (struct ZoO_data_input di [const static 1]) -{ - if (di->file != (FILE *) NULL) - { - fclose(di->file); - - di->file = (FILE *) NULL; - } - - /* prevents di [restrict] */ - ZoO_strings_finalize(&(di->string)); -} diff --git a/src/io/data_input.h b/src/io/data_input.h deleted file mode 100644 index a2f004b..0000000 --- a/src/io/data_input.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _ZoO_IO_DATA_INPUT_H_ -#define _ZoO_IO_DATA_INPUT_H_ - -#include "data_input_types.h" - -int ZoO_data_input_open -( - struct ZoO_data_input di [const static 1], - const char filename [const restrict static 1] -); - -int ZoO_data_input_read_line -( - struct ZoO_data_input di [const static 1], - ZoO_index const punctuations_count, - const ZoO_char punctuations [const restrict static punctuations_count] -); - -void ZoO_data_input_close (struct ZoO_data_input di [const static 1]); - -#endif diff --git a/src/io/data_input_types.h b/src/io/data_input_types.h deleted file mode 100644 index bd2709b..0000000 --- a/src/io/data_input_types.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ZoO_IO_DATA_INPUT_TYPES_H_ -#define _ZoO_IO_DATA_INPUT_TYPES_H_ - -#include - -#include "../pervasive.h" - -#include "../tool/strings.h" - -struct ZoO_data_input -{ - FILE * restrict file; - struct ZoO_strings string; -}; - -#endif diff --git a/src/io/data_output.c b/src/io/data_output.c deleted file mode 100644 index 796d3d0..0000000 --- a/src/io/data_output.c +++ /dev/null @@ -1,65 +0,0 @@ -#define _POSIX_C_SOURCE 200809L -#include -#include -#include -#include /* defines SIZE_MAX */ -#include - -#include "error.h" - -#include "data_output.h" - -int ZoO_data_output_write_line -( - const char filename [const restrict static 1], - char line [const restrict static 1], - size_t const line_size -) -{ - const int old_errno = errno; - FILE * file; - - file = fopen(filename, "a"); - - if (file == (FILE *) NULL) - { - ZoO_ERROR - ( - "Could not open file '%s' in appending mode.", - filename - ); - - return -1; - } - - line[line_size - 1] = '\n'; - - if - ( - fwrite - ( - (const void *) line, - sizeof(char), - line_size, - file - ) < line_size - ) - { - line[line_size - 1] = '\0'; - - ZoO_ERROR - ( - "Could not store line '%s' in %s.", - line, - filename - ); - - fclose(file); - - return -1; - } - - fclose(file); - - return 0; -} diff --git a/src/io/data_output.h b/src/io/data_output.h deleted file mode 100644 index ef963a0..0000000 --- a/src/io/data_output.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _ZoO_IO_DATA_OUTPUT_H_ -#define _ZoO_IO_DATA_OUTPUT_H_ - -int ZoO_data_output_write_line -( - const char filename [const restrict static 1], - char line [const restrict static 1], - size_t const line_size -); - -#endif diff --git a/src/io/error.h b/src/io/error.h deleted file mode 100644 index be7359f..0000000 --- a/src/io/error.h +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef _ZoO_IO_ERROR_H_ -#define _ZoO_IO_ERROR_H_ - -#include - -#include "../pervasive.h" - -#define ZoO_DEBUG_ALL 1 - -#ifndef ZoO_DEBUG_ALL - #define ZoO_DEBUG_ALL 0 -#endif - -#ifndef ZoO_DEBUG_PROGRAM_FLOW - #define ZoO_DEBUG_PROGRAM_FLOW (0 || ZoO_DEBUG_ALL) -#endif - -#ifndef ZoO_DEBUG_CONFIG - #define ZoO_DEBUG_CONFIG (0 || ZoO_DEBUG_ALL) -#endif - -#ifndef ZoO_DEBUG_LEARNING - #define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL) -#endif - -#define ZoO_DEBUG_NETWORK 1 - -#ifndef ZoO_DEBUG_NETWORK - #define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL) -#endif - -#define ZoO_ENABLE_WARNINGS_OUTPUT 1 -#define ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT 1 -#define ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT 1 -#define ZoO_ENABLE_FATAL_ERROR_OUTPUT 1 - -#ifdef ZoO_ENABLE_ERROR_LOCATION - #define ZoO_LOCATION "[" __FILE__ "][" ZoO_TO_STRING(__LINE__) "]" -#else - #define ZoO_LOCATION "" -#endif - -#define ZoO_PRINT_STDERR(symbol, str, ...)\ - fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n", __VA_ARGS__); - -/* - * Given that we use preprocessor contants as flags, we can expect the compilers - * to remove the test condition for disabled flags. No need to be shy about - * allowing many debug options. - */ - -#define ZoO_DEBUG(flag, str, ...)\ - ZoO_ISOLATE\ - (\ - if (flag)\ - {\ - ZoO_PRINT_STDERR("D", str, __VA_ARGS__);\ - }\ - ) - - -#define ZoO_WARNING(str, ...)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_WARNINGS_OUTPUT)\ - {\ - ZoO_PRINT_STDERR("W", str, __VA_ARGS__);\ - }\ - ) - -#define ZoO_ERROR(str, ...)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ - {\ - ZoO_PRINT_STDERR("E", str, __VA_ARGS__);\ - }\ - ) - -#define ZoO_PROG_ERROR(str, ...)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ - {\ - ZoO_PRINT_STDERR("P", str, __VA_ARGS__);\ - }\ - ) - -#define ZoO_FATAL(str, ...)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ - {\ - ZoO_PRINT_STDERR("F", str, __VA_ARGS__);\ - }\ - ) - -/* For outputs without dynamic content (static). ******************************/ - -#define ZoO_PRINT_S_STDERR(symbol, str)\ - fprintf(stderr, "[" symbol "]" ZoO_LOCATION " " str "\n"); - -#define ZoO_S_DEBUG(flag, str)\ - ZoO_ISOLATE\ - (\ - if (flag)\ - {\ - ZoO_PRINT_S_STDERR("D", str);\ - }\ - ) - -#define ZoO_S_WARNING(str)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_WARNINGS_OUTPUT)\ - {\ - ZoO_PRINT_S_STDERR("W", str);\ - }\ - ) - -#define ZoO_S_ERROR(str)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_RUNTIME_ERRORS_OUTPUT)\ - {\ - ZoO_PRINT_S_STDERR("E", str);\ - }\ - ) - -#define ZoO_S_PROG_ERROR(str)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_PROGRAMMING_ERRORS_OUTPUT)\ - {\ - ZoO_PRINT_S_STDERR("P", str);\ - }\ - ) - -#define ZoO_S_FATAL(str)\ - ZoO_ISOLATE\ - (\ - if (ZoO_ENABLE_FATAL_ERROR_OUTPUT)\ - {\ - ZoO_PRINT_S_STDERR("F", str);\ - }\ - ) - -#endif diff --git a/src/io/network.c b/src/io/network.c deleted file mode 100644 index edafd4f..0000000 --- a/src/io/network.c +++ /dev/null @@ -1,568 +0,0 @@ -#include -#include -#include -#include - -/* "POSIX.1 does not require the inclusion of " */ -/* - man page for setsockopt */ -/* #include */ -#include -#include - -#include "error.h" - -#include "network.h" - -static int re_create_socket (struct ZoO_network net [const restrict static 1]) -{ - struct timeval timeout; - const int old_errno = errno; - - errno = 0; - timeout.tv_sec = ZoO_NETWORK_TIMEOUT; - timeout.tv_usec = 0; - - if (net->connection != -1) - { - close(net->connection); - } - - net->connection = - socket - ( - net->addrinfo->ai_family, - net->addrinfo->ai_socktype, - net->addrinfo->ai_protocol - ); - - if (net->connection == -1) - { - ZoO_ERROR("Could not create socket: %s.", strerror(errno)); - - goto RETURN_FAILED; - } - - if - ( - ( - setsockopt - ( - net->connection, - SOL_SOCKET, - SO_RCVTIMEO, - (const void *) &timeout, - (socklen_t) sizeof(struct timeval) - ) < 0 - ) - || - ( - setsockopt - ( - net->connection, - SOL_SOCKET, - SO_SNDTIMEO, - (const void *) &timeout, - (socklen_t) sizeof(struct timeval) - ) < 0 - ) - ) - { - ZoO_ERROR("Could not set timeout on network socket: %s", strerror(errno)); - - goto RETURN_FAILED; - } - - ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connecting to network..."); - - if - ( - connect - ( - net->connection, - net->addrinfo->ai_addr, - net->addrinfo->ai_addrlen - ) != 0 - ) - { - ZoO_ERROR("Could not establish connection: %s", strerror(errno)); - - goto RETURN_FAILED; - } - - errno = old_errno; - - return 0; - -RETURN_FAILED: - errno = old_errno; - - return -1; -} - -static int reconnect (struct ZoO_network net [const restrict static 1]) -{ - const int old_errno = errno; - - memset(net->in, 0, (sizeof(ZoO_char) * 513)); - memset(net->out, 0, (sizeof(ZoO_char) * 513)); - memset(net->buffer, 0, (sizeof(ZoO_char) * 513)); - - net->buffer_index = 0; - net->buffer_remaining = 0; - - if (re_create_socket(net) < 0) - { - return -1; - } - - snprintf(net->out, 512, "USER %s 8 * :%s\r\n", net->user, net->name); - - if (write(net->connection, net->out, strlen(net->out)) < 1) - { - goto RETURN_WRITE_FAILED; - } - - snprintf(net->out, 512, "NICK %s\r\n", net->nick); - - if (write(net->connection, net->out, strlen(net->out)) < 1) - { - goto RETURN_WRITE_FAILED; - } - - net->buffer_remaining = 0; - net->buffer_index = 0; - - ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connected."); - - errno = old_errno; - - return 0; - -RETURN_WRITE_FAILED: - ZoO_ERROR - ( - "Unable to write to the network: %s", - strerror(errno) - ); - - errno = old_errno; - - return -1; -} - -int ZoO_network_connect -( - struct ZoO_network net [const static 1], - const char host [const restrict static 1], - const char port [const restrict static 1], - const char channel [const restrict static 1], - const char user [const restrict static 1], - const char name [const restrict static 1], - const char nick [const restrict static 1] -) -{ - int error; - struct addrinfo hints; - const int old_errno = errno; - - net->connection = -1; - net->channel = channel; - net->user = user; - net->name = name; - net->nick = nick; - net->buffer_index = 0; - net->buffer_remaining = 0; - - memset(&hints, 0, sizeof(struct addrinfo)); - memset(net->in, 0, (sizeof(ZoO_char) * 513)); - memset(net->out, 0, (sizeof(ZoO_char) * 513)); - memset(net->buffer, 0, (sizeof(ZoO_char) * 513)); - - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_STREAM; - - errno = 0; - - error = getaddrinfo(host, port, &hints, &(net->addrinfo)); - - if (error != 0) - { - if (error == EAI_SYSTEM) - { - ZoO_ERROR - ( - "Could not retrieve server information: %s.", - strerror(errno) - ); - } - else - { - ZoO_FATAL - ( - "Could not retrieve server information: %s.", - gai_strerror(error) - ); - } - - errno = old_errno; - - return -1; - } - - errno = old_errno; - - reconnect(net); - - return 0; -} - -static void buffer_msg -( - struct ZoO_network net [const static 1] -) -{ - ssize_t in_count, i; - - if (net->buffer_remaining > 0) - { - in_count = net->buffer_remaining; - net->buffer_remaining = 0; - - goto PARSE_READ; - } - -READ_MORE: - in_count = read(net->connection, net->buffer, 512); - - if (in_count <= 0) - { - ZoO_ERROR("Could not read from network: %s", strerror(errno)); - - while (reconnect(net) < 0) - { - ZoO_S_DEBUG - ( - ZoO_DEBUG_NETWORK, - "Attempting new connection in 5s." - ); - sleep(5); - } - - goto READ_MORE; - } - -PARSE_READ: - for (i = 0; i < in_count; ++i) - { - net->in[net->buffer_index] = net->buffer[i]; - - if - ( - (net->buffer_index > 0) - && (net->in[net->buffer_index - 1] == '\r') - && (net->in[net->buffer_index] == '\n') - ) - { - net->buffer_remaining = (in_count - (i + 1)); - net->in_length = (net->buffer_index - 1); - net->buffer_index = 0; - - if (net->buffer_remaining > 0) - { - memmove - ( - (void *) net->buffer, - (const void *) (net->buffer + (i + 1)), - net->buffer_remaining - ); - } - - return; - } - - net->buffer_index += 1; - - if (net->buffer_index > 512) - { - ZoO_S_WARNING("Incoming message is too long. Discarded."); - - net->buffer_index = 0; - net->buffer_remaining = 0; - - break; - } - } - - goto READ_MORE; -} - -void handle_ping (struct ZoO_network net [const restrict static 1]) -{ - const int old_errno = errno; - - #if ZoO_RANDOMLY_IGNORE_PING == 1 - if ((rand() % 10) < 3) - { - ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "Ping request ignored."); - - return; - } - - #endif - - #if ZoO_DEBUG_NETWORK_PING == 1 - net->in[net->in_length] = '\0'; - - ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s", net->in); - - net->in[net->in_length] = '\r'; - #endif - - net->in[1] = 'O'; - - errno = 0; - - if (write(net->connection, net->in, (net->in_length + 2)) < 1) - { - ZoO_ERROR("Could not reply to PING request: %s", strerror(errno)); - - errno = old_errno; - - while (reconnect(net) < 0) - { - ZoO_S_DEBUG - ( - ZoO_DEBUG_NETWORK, - "Attempting new connection in 5s." - ); - sleep(5); - } - - return; - } - - errno = old_errno; - -#if ZoO_DEBUG_NETWORK_PING == 1 - net->in[net->in_length] = '\0'; - - ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->in); -#endif - -} - -int ZoO_network_receive -( - struct ZoO_network net [const restrict static 1], - size_t msg_offset [const restrict static 1], - size_t msg_size [const restrict static 1], - enum ZoO_msg_type type [const restrict static 1] -) -{ - const int old_errno = errno; - ssize_t cmd, i; - -READ_NEW_MSG: - buffer_msg(net); - - net->in[net->in_length + 2] = '\0'; - - /* XXX: doesn't that prevent net [restrict]? */ - if (ZoO_IS_PREFIX("PING", net->in)) - { - - handle_ping(net); - - goto READ_NEW_MSG; - } - - if (net->in_length == 0) - { - goto READ_NEW_MSG; - } - - net->in[net->in_length] = '\0'; - - ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s", net->in); - - if (net->in[0] == ':') - { - cmd = 0; - - for (i = 1; i < 512; i++) - { - if (net->in[i] == ' ') - { - cmd = (i + 1); - - break; - } - } - - if (ZoO_IS_PREFIX("001", (net->in + cmd))) - { - snprintf - ( - net->out, - 512, - "JOIN :%s\r\n", - net->channel - ); - - errno = 0; - - if (write(net->connection, net->out, strlen(net->out)) < 1) - { - ZoO_ERROR - ( - "Could not send JOIN request: %s", - strerror(errno) - ); - - errno = old_errno; - - if (reconnect(net) < 0) - { - return -1; - } - } - - errno = old_errno; - - ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->out); - - goto READ_NEW_MSG; - } - - if (ZoO_IS_PREFIX("JOIN", (net->in + cmd))) - { - for (i = 1; (i < 512) && (net->in[i] != '!'); ++i) - { - } - - if ((i == 512) || (i == 1)) - { - ZoO_ERROR("Could not find JOIN username: %s", net->in); - - goto READ_NEW_MSG; - } - - *msg_offset = 1; - *msg_size = (i - 1); - net->in[i] = '\0'; - - *type = ZoO_JOIN; - - return 0; - } - - if (ZoO_IS_PREFIX("PRIVMSG", (net->in + cmd))) - { - - for (; i < 512; i++) - { - if (net->in[i] == ':') - { - cmd = (i + 1); - - break; - } - } - - *msg_offset = cmd; - *msg_size = (net->in_length - cmd); - - /*net->in[*msg_size - 1] = '\0'; */ - - *type = ZoO_PRIVMSG; - - return 0; - } - } - - if (ZoO_IS_PREFIX("ERROR", (net->in + cmd))) - { - while (reconnect(net) < 0) - { - ZoO_S_DEBUG - ( - ZoO_DEBUG_NETWORK, - "Attempting new connection in 5s." - ); - sleep(5); - } - } - - goto READ_NEW_MSG; -} - -int ZoO_network_send (struct ZoO_network net [const restrict static 1]) -{ - int const old_errno = errno; - - if (ZoO_IS_PREFIX("\001action", net->out)) - { - - net->out[1] = 'A'; - net->out[2] = 'C'; - net->out[3] = 'T'; - net->out[4] = 'I'; - net->out[5] = 'O'; - net->out[6] = 'N'; - - snprintf - ( - net->in, - 512, - "PRIVMSG %s :%s\001\r\n", - net->channel, - net->out - ); - } - else - { - snprintf - ( - net->in, - 512, - "PRIVMSG %s :%s\r\n", - net->channel, - net->out - ); - } - - errno = 0; - - if (write(net->connection, net->in, strlen(net->in)) < 1) - { - ZoO_ERROR - ( - "Could not send PRIVMSG: %s.", - strerror(errno) - ); - - errno = old_errno; - - if (reconnect(net) < 0) - { - return -2; - } - else - { - return -1; - } - } - - errno = old_errno; - - ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->in); - - return 0; -} - -void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]) -{ - freeaddrinfo(net->addrinfo); - close(net->connection); -} - diff --git a/src/io/network.h b/src/io/network.h deleted file mode 100644 index 647b19c..0000000 --- a/src/io/network.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _ZoO_IO_NETWORK_H_ -#define _ZoO_IO_NETWORK_H_ -#include "network_types.h" - -int ZoO_network_connect -( - struct ZoO_network net [const static 1], - const char host [const restrict static 1], - const char port [const restrict static 1], - const char channel [const restrict static 1], - const char user [const restrict static 1], - const char name [const restrict static 1], - const char nick [const restrict static 1] -); - -int ZoO_network_receive -( - struct ZoO_network net [const static 1], - size_t msg_offset [const restrict static 1], - size_t msg_size [const restrict static 1], - enum ZoO_msg_type type [const restrict static 1] -); - -int ZoO_network_send (struct ZoO_network net [const restrict static 1]); - -void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]); - -#endif diff --git a/src/io/network_types.h b/src/io/network_types.h deleted file mode 100644 index 9a328a7..0000000 --- a/src/io/network_types.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _ZoO_IO_NETWORK_TYPES_H_ -#define _ZoO_IO_NETWORK_TYPES_H_ - -#define POSIX_C_SOURCE - -#include -#include -#include - -#include "../pervasive.h" - -enum ZoO_msg_type -{ - ZoO_PRIVMSG, - ZoO_JOIN -}; - -struct ZoO_network -{ - size_t buffer_index; - size_t buffer_remaining; - size_t in_length; - struct addrinfo * addrinfo; - ZoO_char buffer [513]; - ZoO_char in [513]; - ZoO_char out [513]; - int connection; - const char * restrict channel; - const char * restrict user; - const char * restrict name; - const char * restrict nick; -}; - -#endif diff --git a/src/io/parameters.c b/src/io/parameters.c deleted file mode 100644 index 77c33aa..0000000 --- a/src/io/parameters.c +++ /dev/null @@ -1,385 +0,0 @@ -#include -#include -#include -#include - -#include "../pervasive.h" - -#include "error.h" - -#include "parameters.h" - -static void load_default_parameters -( - struct ZoO_parameters param [const restrict static 1] -) -{ - param->data_filename = ZoO_DEFAULT_DATA_FILENAME; - param->new_data_filename = (char *) NULL; - - param->irc_server_addr = ZoO_DEFAULT_IRC_SERVER_ADDR; - param->irc_server_port = ZoO_DEFAULT_IRC_SERVER_PORT; - param->irc_server_channel = ZoO_DEFAULT_IRC_SERVER_CHANNEL; - param->irc_username = ZoO_DEFAULT_IRC_USERNAME; - param->irc_realname = ZoO_DEFAULT_IRC_REALNAME; - - param->reply_rate = ZoO_DEFAULT_REPLY_RATE; - - param->aliases_count = 0; - param->aliases = NULL; -} - -static void print_help (const char exec [const restrict static 1]) -{ - printf - ( - "Usage: %s [option_1 option_2 ...] NICKNAME [ALIAS_1 ALIAS_2 ...] \n" - "NICKNAME is used as the IRC nickname value.\n" - "If NICKNAME or any ALIAS is found in an event, the program will reply.\n" - "\nAvailable options:\n" - " [--data-filename | -df] FILENAME\n" - " Learn content from FILENAME before connecting.\n" - " Default: %s.\n" - " [--new-data-filename | -ndf] FILENAME\n" - " Store new data learned in FILENAME.\n" - " Default: value of the --data-filename param.\n" - " [--irc-server-addr | -isa] IRC_SERVER_ADDR\n" - " Connect to this server address.\n" - " Default: %s.\n" - " [--irc-server-port | -isp] IRC_SERVER_PORT\n" - " Connect to this server port.\n" - " Default: %s.\n" - " [--irc-server-channel | -isc] IRC_SERVER_CHANNEL\n" - " Connect to this server's channel.\n" - " Default: %s.\n" - " [--irc-username | -iu] USERNAME\n" - " Connect using this as 'username' (shown in WHOIS).\n" - " Default: %s.\n" - " [--irc-realname | -ir] REALNAME\n" - " Connect using this as 'realname' (shown in WHOIS).\n" - " Default: %s.\n" - " [--reply-rate | -rr] REPLY_RATE\n" - " Chance to reply to an event (integer, range [0, 100]).\n" - " Default: %d.\n", - exec, - ZoO_DEFAULT_DATA_FILENAME, - ZoO_DEFAULT_IRC_SERVER_ADDR, - ZoO_DEFAULT_IRC_SERVER_PORT, - ZoO_DEFAULT_IRC_SERVER_CHANNEL, - ZoO_DEFAULT_IRC_USERNAME, - ZoO_DEFAULT_IRC_REALNAME, - ZoO_DEFAULT_REPLY_RATE - ); -} - -static int parse_string_arg -( - const char * restrict dest [const restrict static 1], - int const i, - const char * restrict argv [const restrict static 1], - int const argc -) -{ - if (i == argc) - { - ZoO_FATAL - ( - "Missing value for parameter '%s'.", - /* Safe: i > 1 */ - argv[i - 1] - ); - - return -1; - } - - *dest = argv[i]; - - return 0; -} - -static int parse_integer_arg -( - int dest [const restrict static 1], - int const i, - const char * argv [const restrict static 1], - int const argc, - int const min_val, - int const max_val -) -{ - long int result; - char * endptr; - const int old_errno = errno; - - if (i == argc) - { - ZoO_FATAL - ( - "Missing value for parameter '%s'.", - /* Safe: i > 1 */ - argv[i - 1] - ); - - return -1; - } - - errno = 0; - - result = strtol(argv[i], &endptr, 10); - - if - ( - (errno != 0) - || ((*endptr) == '\n') - || (result < min_val) - || (result > max_val) - ) - { - ZoO_FATAL - ( - "Invalid or missing value for parameter '%s', accepted range is " - "[%d, %d] (integer).", - /* Safe: i > 1 */ - argv[i - 1], - min_val, - max_val - ); - - errno = old_errno; - - return -1; - } - - *dest = (int) result; - - errno = old_errno; - - return 0; -} - -int ZoO_parameters_initialize -( - struct ZoO_parameters param [const restrict static 1], - int const argc, - const char * argv [const restrict static argc] -) -{ - int i; - - load_default_parameters(param); - - for (i = 1; i < argc; ++i) - { - if - ( - (strcmp(argv[i], "--data-filename") == 0) - || (strcmp(argv[i], "-df") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->data_filename), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--new-data-filename") == 0) - || (strcmp(argv[i], "-ndf") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->new_data_filename), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--irc-server-addr") == 0) - || (strcmp(argv[i], "-isa") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->irc_server_addr), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--irc-server-port") == 0) - || (strcmp(argv[i], "-isp") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->irc_server_port), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--irc-server-channel") == 0) - || (strcmp(argv[i], "-isc") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->irc_server_channel), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--irc-username") == 0) - || (strcmp(argv[i], "-iu") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->irc_username), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--irc-realname") == 0) - || (strcmp(argv[i], "-in") == 0) - ) - { - i += 1; - - if - ( - parse_string_arg - ( - &(param->irc_realname), - i, - argv, - argc - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--reply-rate") == 0) - || (strcmp(argv[i], "-rr") == 0) - ) - { - i += 1; - - if - ( - parse_integer_arg - ( - &(param->reply_rate), - i, - argv, - argc, - 0, - 100 - ) < 0 - ) - { - return -1; - } - } - else if - ( - (strcmp(argv[i], "--help") == 0) - || (strcmp(argv[i], "-h") == 0) - ) - { - print_help(argv[0]); - - return 0; - } - else - { - break; - } - } - - if (i == argc) - { - ZoO_S_FATAL("Missing argument: NICKNAME"); - - print_help(argv[0]); - - return -1; - } - - param->aliases_count = (argc - i); - param->aliases = (argv + i); - - if (param->new_data_filename == (char *) NULL) - { - param->new_data_filename = param->data_filename; - } - - return 1; -} diff --git a/src/io/parameters.h b/src/io/parameters.h deleted file mode 100644 index 1011e2b..0000000 --- a/src/io/parameters.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _ZoO_IO_PARAMETERS_H_ -#define _ZoO_IO_PARAMETERS_H_ - -#include "parameters_types.h" - -int ZoO_parameters_initialize -( - struct ZoO_parameters param [const static 1], - int const argc, - const char * argv [const static argc] -); - -#endif diff --git a/src/io/parameters_types.h b/src/io/parameters_types.h deleted file mode 100644 index 92a9e30..0000000 --- a/src/io/parameters_types.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _ZoO_IO_PARAMETERS_TYPES_H_ -#define _ZoO_IO_PARAMETERS_TYPES_H_ - -struct ZoO_parameters -{ - const char * restrict data_filename; - const char * restrict new_data_filename; - - const char * restrict irc_server_addr; - const char * restrict irc_server_port; - const char * restrict irc_server_channel; - const char * restrict irc_username; - const char * restrict irc_realname; - - int reply_rate; - - int aliases_count; - const char * restrict * restrict aliases; -}; - -#endif diff --git a/src/irc/network.c b/src/irc/network.c new file mode 100644 index 0000000..edafd4f --- /dev/null +++ b/src/irc/network.c @@ -0,0 +1,568 @@ +#include +#include +#include +#include + +/* "POSIX.1 does not require the inclusion of " */ +/* - man page for setsockopt */ +/* #include */ +#include +#include + +#include "error.h" + +#include "network.h" + +static int re_create_socket (struct ZoO_network net [const restrict static 1]) +{ + struct timeval timeout; + const int old_errno = errno; + + errno = 0; + timeout.tv_sec = ZoO_NETWORK_TIMEOUT; + timeout.tv_usec = 0; + + if (net->connection != -1) + { + close(net->connection); + } + + net->connection = + socket + ( + net->addrinfo->ai_family, + net->addrinfo->ai_socktype, + net->addrinfo->ai_protocol + ); + + if (net->connection == -1) + { + ZoO_ERROR("Could not create socket: %s.", strerror(errno)); + + goto RETURN_FAILED; + } + + if + ( + ( + setsockopt + ( + net->connection, + SOL_SOCKET, + SO_RCVTIMEO, + (const void *) &timeout, + (socklen_t) sizeof(struct timeval) + ) < 0 + ) + || + ( + setsockopt + ( + net->connection, + SOL_SOCKET, + SO_SNDTIMEO, + (const void *) &timeout, + (socklen_t) sizeof(struct timeval) + ) < 0 + ) + ) + { + ZoO_ERROR("Could not set timeout on network socket: %s", strerror(errno)); + + goto RETURN_FAILED; + } + + ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connecting to network..."); + + if + ( + connect + ( + net->connection, + net->addrinfo->ai_addr, + net->addrinfo->ai_addrlen + ) != 0 + ) + { + ZoO_ERROR("Could not establish connection: %s", strerror(errno)); + + goto RETURN_FAILED; + } + + errno = old_errno; + + return 0; + +RETURN_FAILED: + errno = old_errno; + + return -1; +} + +static int reconnect (struct ZoO_network net [const restrict static 1]) +{ + const int old_errno = errno; + + memset(net->in, 0, (sizeof(ZoO_char) * 513)); + memset(net->out, 0, (sizeof(ZoO_char) * 513)); + memset(net->buffer, 0, (sizeof(ZoO_char) * 513)); + + net->buffer_index = 0; + net->buffer_remaining = 0; + + if (re_create_socket(net) < 0) + { + return -1; + } + + snprintf(net->out, 512, "USER %s 8 * :%s\r\n", net->user, net->name); + + if (write(net->connection, net->out, strlen(net->out)) < 1) + { + goto RETURN_WRITE_FAILED; + } + + snprintf(net->out, 512, "NICK %s\r\n", net->nick); + + if (write(net->connection, net->out, strlen(net->out)) < 1) + { + goto RETURN_WRITE_FAILED; + } + + net->buffer_remaining = 0; + net->buffer_index = 0; + + ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "(Re)connected."); + + errno = old_errno; + + return 0; + +RETURN_WRITE_FAILED: + ZoO_ERROR + ( + "Unable to write to the network: %s", + strerror(errno) + ); + + errno = old_errno; + + return -1; +} + +int ZoO_network_connect +( + struct ZoO_network net [const static 1], + const char host [const restrict static 1], + const char port [const restrict static 1], + const char channel [const restrict static 1], + const char user [const restrict static 1], + const char name [const restrict static 1], + const char nick [const restrict static 1] +) +{ + int error; + struct addrinfo hints; + const int old_errno = errno; + + net->connection = -1; + net->channel = channel; + net->user = user; + net->name = name; + net->nick = nick; + net->buffer_index = 0; + net->buffer_remaining = 0; + + memset(&hints, 0, sizeof(struct addrinfo)); + memset(net->in, 0, (sizeof(ZoO_char) * 513)); + memset(net->out, 0, (sizeof(ZoO_char) * 513)); + memset(net->buffer, 0, (sizeof(ZoO_char) * 513)); + + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + errno = 0; + + error = getaddrinfo(host, port, &hints, &(net->addrinfo)); + + if (error != 0) + { + if (error == EAI_SYSTEM) + { + ZoO_ERROR + ( + "Could not retrieve server information: %s.", + strerror(errno) + ); + } + else + { + ZoO_FATAL + ( + "Could not retrieve server information: %s.", + gai_strerror(error) + ); + } + + errno = old_errno; + + return -1; + } + + errno = old_errno; + + reconnect(net); + + return 0; +} + +static void buffer_msg +( + struct ZoO_network net [const static 1] +) +{ + ssize_t in_count, i; + + if (net->buffer_remaining > 0) + { + in_count = net->buffer_remaining; + net->buffer_remaining = 0; + + goto PARSE_READ; + } + +READ_MORE: + in_count = read(net->connection, net->buffer, 512); + + if (in_count <= 0) + { + ZoO_ERROR("Could not read from network: %s", strerror(errno)); + + while (reconnect(net) < 0) + { + ZoO_S_DEBUG + ( + ZoO_DEBUG_NETWORK, + "Attempting new connection in 5s." + ); + sleep(5); + } + + goto READ_MORE; + } + +PARSE_READ: + for (i = 0; i < in_count; ++i) + { + net->in[net->buffer_index] = net->buffer[i]; + + if + ( + (net->buffer_index > 0) + && (net->in[net->buffer_index - 1] == '\r') + && (net->in[net->buffer_index] == '\n') + ) + { + net->buffer_remaining = (in_count - (i + 1)); + net->in_length = (net->buffer_index - 1); + net->buffer_index = 0; + + if (net->buffer_remaining > 0) + { + memmove + ( + (void *) net->buffer, + (const void *) (net->buffer + (i + 1)), + net->buffer_remaining + ); + } + + return; + } + + net->buffer_index += 1; + + if (net->buffer_index > 512) + { + ZoO_S_WARNING("Incoming message is too long. Discarded."); + + net->buffer_index = 0; + net->buffer_remaining = 0; + + break; + } + } + + goto READ_MORE; +} + +void handle_ping (struct ZoO_network net [const restrict static 1]) +{ + const int old_errno = errno; + + #if ZoO_RANDOMLY_IGNORE_PING == 1 + if ((rand() % 10) < 3) + { + ZoO_S_DEBUG(ZoO_DEBUG_NETWORK, "Ping request ignored."); + + return; + } + + #endif + + #if ZoO_DEBUG_NETWORK_PING == 1 + net->in[net->in_length] = '\0'; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s", net->in); + + net->in[net->in_length] = '\r'; + #endif + + net->in[1] = 'O'; + + errno = 0; + + if (write(net->connection, net->in, (net->in_length + 2)) < 1) + { + ZoO_ERROR("Could not reply to PING request: %s", strerror(errno)); + + errno = old_errno; + + while (reconnect(net) < 0) + { + ZoO_S_DEBUG + ( + ZoO_DEBUG_NETWORK, + "Attempting new connection in 5s." + ); + sleep(5); + } + + return; + } + + errno = old_errno; + +#if ZoO_DEBUG_NETWORK_PING == 1 + net->in[net->in_length] = '\0'; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->in); +#endif + +} + +int ZoO_network_receive +( + struct ZoO_network net [const restrict static 1], + size_t msg_offset [const restrict static 1], + size_t msg_size [const restrict static 1], + enum ZoO_msg_type type [const restrict static 1] +) +{ + const int old_errno = errno; + ssize_t cmd, i; + +READ_NEW_MSG: + buffer_msg(net); + + net->in[net->in_length + 2] = '\0'; + + /* XXX: doesn't that prevent net [restrict]? */ + if (ZoO_IS_PREFIX("PING", net->in)) + { + + handle_ping(net); + + goto READ_NEW_MSG; + } + + if (net->in_length == 0) + { + goto READ_NEW_MSG; + } + + net->in[net->in_length] = '\0'; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->in] %s", net->in); + + if (net->in[0] == ':') + { + cmd = 0; + + for (i = 1; i < 512; i++) + { + if (net->in[i] == ' ') + { + cmd = (i + 1); + + break; + } + } + + if (ZoO_IS_PREFIX("001", (net->in + cmd))) + { + snprintf + ( + net->out, + 512, + "JOIN :%s\r\n", + net->channel + ); + + errno = 0; + + if (write(net->connection, net->out, strlen(net->out)) < 1) + { + ZoO_ERROR + ( + "Could not send JOIN request: %s", + strerror(errno) + ); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -1; + } + } + + errno = old_errno; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->out); + + goto READ_NEW_MSG; + } + + if (ZoO_IS_PREFIX("JOIN", (net->in + cmd))) + { + for (i = 1; (i < 512) && (net->in[i] != '!'); ++i) + { + } + + if ((i == 512) || (i == 1)) + { + ZoO_ERROR("Could not find JOIN username: %s", net->in); + + goto READ_NEW_MSG; + } + + *msg_offset = 1; + *msg_size = (i - 1); + net->in[i] = '\0'; + + *type = ZoO_JOIN; + + return 0; + } + + if (ZoO_IS_PREFIX("PRIVMSG", (net->in + cmd))) + { + + for (; i < 512; i++) + { + if (net->in[i] == ':') + { + cmd = (i + 1); + + break; + } + } + + *msg_offset = cmd; + *msg_size = (net->in_length - cmd); + + /*net->in[*msg_size - 1] = '\0'; */ + + *type = ZoO_PRIVMSG; + + return 0; + } + } + + if (ZoO_IS_PREFIX("ERROR", (net->in + cmd))) + { + while (reconnect(net) < 0) + { + ZoO_S_DEBUG + ( + ZoO_DEBUG_NETWORK, + "Attempting new connection in 5s." + ); + sleep(5); + } + } + + goto READ_NEW_MSG; +} + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]) +{ + int const old_errno = errno; + + if (ZoO_IS_PREFIX("\001action", net->out)) + { + + net->out[1] = 'A'; + net->out[2] = 'C'; + net->out[3] = 'T'; + net->out[4] = 'I'; + net->out[5] = 'O'; + net->out[6] = 'N'; + + snprintf + ( + net->in, + 512, + "PRIVMSG %s :%s\001\r\n", + net->channel, + net->out + ); + } + else + { + snprintf + ( + net->in, + 512, + "PRIVMSG %s :%s\r\n", + net->channel, + net->out + ); + } + + errno = 0; + + if (write(net->connection, net->in, strlen(net->in)) < 1) + { + ZoO_ERROR + ( + "Could not send PRIVMSG: %s.", + strerror(errno) + ); + + errno = old_errno; + + if (reconnect(net) < 0) + { + return -2; + } + else + { + return -1; + } + } + + errno = old_errno; + + ZoO_DEBUG(ZoO_DEBUG_NETWORK, "[NET->out] %s", net->in); + + return 0; +} + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]) +{ + freeaddrinfo(net->addrinfo); + close(net->connection); +} + diff --git a/src/irc/network.h b/src/irc/network.h new file mode 100644 index 0000000..647b19c --- /dev/null +++ b/src/irc/network.h @@ -0,0 +1,28 @@ +#ifndef _ZoO_IO_NETWORK_H_ +#define _ZoO_IO_NETWORK_H_ +#include "network_types.h" + +int ZoO_network_connect +( + struct ZoO_network net [const static 1], + const char host [const restrict static 1], + const char port [const restrict static 1], + const char channel [const restrict static 1], + const char user [const restrict static 1], + const char name [const restrict static 1], + const char nick [const restrict static 1] +); + +int ZoO_network_receive +( + struct ZoO_network net [const static 1], + size_t msg_offset [const restrict static 1], + size_t msg_size [const restrict static 1], + enum ZoO_msg_type type [const restrict static 1] +); + +int ZoO_network_send (struct ZoO_network net [const restrict static 1]); + +void ZoO_network_disconnect (struct ZoO_network net [const restrict static 1]); + +#endif diff --git a/src/irc/network_types.h b/src/irc/network_types.h new file mode 100644 index 0000000..9a328a7 --- /dev/null +++ b/src/irc/network_types.h @@ -0,0 +1,34 @@ +#ifndef _ZoO_IO_NETWORK_TYPES_H_ +#define _ZoO_IO_NETWORK_TYPES_H_ + +#define POSIX_C_SOURCE + +#include +#include +#include + +#include "../pervasive.h" + +enum ZoO_msg_type +{ + ZoO_PRIVMSG, + ZoO_JOIN +}; + +struct ZoO_network +{ + size_t buffer_index; + size_t buffer_remaining; + size_t in_length; + struct addrinfo * addrinfo; + ZoO_char buffer [513]; + ZoO_char in [513]; + ZoO_char out [513]; + int connection; + const char * restrict channel; + const char * restrict user; + const char * restrict name; + const char * restrict nick; +}; + +#endif diff --git a/src/knowledge/CMakeLists.txt b/src/knowledge/CMakeLists.txt new file mode 100644 index 0000000..1245321 --- /dev/null +++ b/src/knowledge/CMakeLists.txt @@ -0,0 +1,11 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_word.c + ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/knowledge/knowledge.c b/src/knowledge/knowledge.c new file mode 100644 index 0000000..a72969e --- /dev/null +++ b/src/knowledge/knowledge.c @@ -0,0 +1,21 @@ +#include +#include +#include /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +/** Basic functions of the ZoO_knowledge structure ****************************/ + +/* See: "knowledge.h" */ +void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1]) +{ + k->words = (struct ZoO_knowledge_word *) NULL; + k->words_length = 0; + k->words_sorted = (ZoO_index *) NULL; + + k->sequences = (ZoO_index **) NULL; + k->sequences_length = 0; + k->sequences_sorted = (ZoO_index *) NULL; +} diff --git a/src/knowledge/knowledge.h b/src/knowledge/knowledge.h new file mode 100644 index 0000000..51d94c4 --- /dev/null +++ b/src/knowledge/knowledge.h @@ -0,0 +1,110 @@ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_H_ + +#include "../core/char_types.h" +#include "../core/index_types.h" + +#include "knowledge_types.h" + +void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1]); + +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]); + +/* + * When returning 0: + * {word} was added to {k}, or was already there. + * {*result} indicates where {word} is in {k->words}. + * + * When returning -1: + * Something went wrong when adding the occurrence of {word} to {k}. + * {k} remains semantically unchanged. + * {*result} may or may not have been altered. + */ +int ZoO_knowledge_learn_word +( + struct ZoO_knowledge k [const static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_learn_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order +); + +int ZoO_knowledge_learn_markov_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order +); + +int ZoO_knowledge_get_following_sequences_ref +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index initial_word, + const ZoO_index * restrict following_sequences_ref [const restrict static 1], + const ZoO_index * restrict following_sequences_weights [const restrict static 1], + ZoO_index following_sequences_weights_sum [const static 1] +); + +int ZoO_knowledge_get_sequence +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequences_ref, + const ZoO_index * restrict sequence [const restrict static 1] +); + +int ZoO_knowledge_get_word +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index word_ref, + const ZoO_char * word [const restrict static 1], + size_t word_size [const restrict static 1] +); + +/* + * When returning 0: + * {word} is in {k}. + * {word} is located at {k->words[*result]}. + * + * When returning -1: + * {word} is not in {k}. + * {*result} is where {word} was expected to be found in + * {k->sorted_indices}. + */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const size_t word_size, + ZoO_index result [const restrict static 1] +); + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +); + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +); + +#endif diff --git a/src/knowledge/knowledge_finalize.c b/src/knowledge/knowledge_finalize.c new file mode 100644 index 0000000..36a7406 --- /dev/null +++ b/src/knowledge/knowledge_finalize.c @@ -0,0 +1,122 @@ +#include +#include +#include /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +static void knowledge_sequence_collection_finalize +( + struct ZoO_knowledge_sequence_collection c [const restrict static 1] +) +{ + ZoO_index i; + + if (c->sequences_ref != (ZoO_index *) NULL) + { + free((void *) c->sequences_ref); + c->sequences_ref = (ZoO_index *) NULL; + } + + if (c->sequences_ref_sorted != (ZoO_index *) NULL) + { + free((void *) c->sequences_ref_sorted); + c->sequences_ref_sorted = (ZoO_index *) NULL; + } + + if (c->occurrences != (ZoO_index *) NULL) + { + free((void *) c->occurrences); + c->occurrences = (ZoO_index *) NULL; + } + + for (i = 0; i < c->sequences_ref_length; ++i) + { + free((void *) c->targets[i]); + free((void *) c->targets_occurrences[i]); + } + + c->sequences_ref_length = 0; + + if (c->targets != (ZoO_index **) NULL) + { + free((void *) c->targets); + c->targets != (ZoO_index **) NULL; + } + + free((void *) c->targets_length); + + if (c->targets_occurrences != (ZoO_index **) NULL) + { + free((void *) c->targets_occurrences); + c->targets_occurrences != (ZoO_index **) NULL; + } +} + +static void knowledge_word_finalize +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + w->word_size = 0; + w->occurrences = 0; + + if (w->word != (ZoO_char *) NULL) + { + free((void *) w->word); + + w->word = (ZoO_char *) NULL; + } + + knowledge_sequence_collection_finalize(&(w->followed)); + knowledge_sequence_collection_finalize(&(w->preceded)); +} + +/* See: "knowledge.h" */ +void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]) +{ + ZoO_index i; + + for (i = 0; i < k->words_length; ++i) + { + knowledge_word_finalize(k->words + i); + } + + k->words_length = 0; + + if (k->words != (struct ZoO_knowledge_word *) NULL) + { + free((void *) k->words); + + k->words = (struct ZoO_knowledge_word *) NULL; + } + + if (k->words_sorted != (ZoO_index *) NULL) + { + free((void *) k->words_sorted); + + k->words_sorted = (ZoO_index *) NULL; + } + + for (i = 0; i < k->sequences_length; ++i) + { + free((void *) k->sequences[i]); + } + + k->sequences_length = 0; + + if (k->sequences != (ZoO_index **) NULL) + { + free((void *) k->sequences); + + k->sequences = (ZoO_index **) NULL; + } + + if (k->sequences_sorted != (ZoO_index *) NULL) + { + free((void *) k->sequences_sorted); + + k->sequences_sorted = (ZoO_index *) NULL; + } +} diff --git a/src/knowledge/knowledge_learn_sequence.c b/src/knowledge/knowledge_learn_sequence.c new file mode 100644 index 0000000..23a5ca7 --- /dev/null +++ b/src/knowledge/knowledge_learn_sequence.c @@ -0,0 +1,324 @@ +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/sequence.h" + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZE ****************************************************************/ +/******************************************************************************/ +static void set_nth_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sorted_sequence_id, + const ZoO_index sequence_id +) +{ + /* Safe: (> k->sequences_length 1) */ + if (sorted_sequence_id < (k->sequences_length - 1)) + { + memmove + ( + /* Safe: (=< (+ sorted_sequence_id 1) k->sequences_length) */ + (void *) (k->sequences_sorted + (sorted_sequence_id + 1)), + (const void *) (k->sequences_sorted + sorted_sequence_id), + ((k->sequences_length - 1) - sorted_sequence_id) + ); + } + + k->sequences_sorted[sorted_sequence_id] = sequence_id; +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static int reallocate_sequences_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index ** new_sequences; + + if ((SIZE_MAX / sizeof(ZoO_index *)) > (size_t) k->sequences_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sequences list, as it would overflow" + "size_t variables." + ); + + return -1; + } + + new_sequences = + (ZoO_index **) realloc + ( + (void *) k->sequences, + (((size_t) k->sequences_length) * sizeof(ZoO_index *)) + ); + + if (new_sequences == (ZoO_index **) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sequence list." + ); + + return -1; + } + + k->sequences = new_sequences; + + return 0; +} + +static int reallocate_sequences_sorted_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index * new_sequences_sorted; + + if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->sequences_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sorted sequences list, as it would" + " overflow size_t variables." + ); + + return -1; + } + + new_sequences_sorted = + (ZoO_index *) realloc + ( + (void *) k->sequences_sorted, + ((size_t) k->sequences_length) * sizeof(ZoO_index) + ); + + if (new_sequences_sorted == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sorted sequences" + " list." + ); + + return -1; + } + + k->sequences_sorted = new_sequences_sorted; + + return 0; +} + +/* Pre: (=< ZoO_INDEX_MAX SIZE_MAX) */ +static ZoO_index * copy_sequence +( + const ZoO_index base [const restrict static 1], + const ZoO_index base_length, + const ZoO_index markov_order +) +{ + ZoO_index * result; + + result = (ZoO_index *) calloc((size_t) base_length, sizeof(ZoO_index)); + + if (result == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required to store a new sequence." + ); + + return (ZoO_index *) NULL; + } + + memcpy + ( + (void *) result, + (const void *) base, + (((size_t) base_length) * sizeof(ZoO_index)) + ); + + return result; +} + +static int add_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre (> markov_order 1) */ + const ZoO_index sequence_id, + const ZoO_index sorted_sequence_id +) +{ + ZoO_index * stored_sequence; + + if (k->sequences_length == ZoO_INDEX_MAX) + { + ZoO_S_ERROR + ( + "Unable to add sequence: the variable that stores the number of known " + "sequences would overflow." + ); + + return -1; + } + + stored_sequence = copy_sequence(sequence, sequence_length, markov_order); + + if (stored_sequence == (ZoO_index *) NULL) + { + return -1; + } + + k->sequences_length += 1; + + if (reallocate_sequences_list(k) < 0) + { + k->sequences_length -= 1; + + return -1; + } + + k->sequences[sequence_id] = stored_sequence; + + if (reallocate_sequences_sorted_list(k) < 0) + { + k->sequences_length -= 1; + + return -1; + } + + set_nth_sequence(k, sorted_sequence_id, sequence_id); + + return -1; +} + +/******************************************************************************/ +/** SEARCH ********************************************************************/ +/******************************************************************************/ + +static int find_sequence +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre: (> 1) */ + ZoO_index sequence_id [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + const ZoO_index markov_sequence_length = (markov_order - 1); + + /* Handles the case where the list is empty ********************************/ + current_max = k->sequences_length; + + if (current_max == 0) + { + *sequence_id = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = + ZoO_sequence_cmp + ( + k->sequences[k->sequences_sorted[i]], + markov_sequence_length, + sequence, + sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *sequence_id = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *sequence_id = i; + + return -1; + } + + current_max = (i - 1); + } + else + { + *sequence_id = k->sequences_sorted[i]; + + return 0; + } + } +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_markov_sequence +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sequence [const restrict static 1], + const ZoO_index sequence_length, + const ZoO_index markov_order, /* Pre (> markov_order 1) */ + ZoO_index sequence_id [const restrict static 1] +) +{ + ZoO_index sorted_id; + + if + ( + find_sequence + ( + k, + sequence, + sequence_length, + markov_order, + sequence_id + ) == 0 + ) + { + return 0; + } + + sorted_id = *sequence_id; + *sequence_id = k->sequences_length; + + return + add_sequence + ( + k, + sequence, + sequence_length, + markov_order, + *sequence_id, + sorted_id + ); +} diff --git a/src/knowledge/knowledge_learn_word.c b/src/knowledge/knowledge_learn_word.c new file mode 100644 index 0000000..f55ac5b --- /dev/null +++ b/src/knowledge/knowledge_learn_word.c @@ -0,0 +1,276 @@ +#include +#include +#include /* defines SIZE_MAX */ + +#include "../cli/cli.h" + +#include "knowledge.h" + +/******************************************************************************/ +/** INITIALIZING STRUCTURES ***************************************************/ +/******************************************************************************/ + +static void initialize_sequence_collection +( + struct ZoO_knowledge_sequence_collection c [const restrict static 1] +) +{ + c->sequences_ref = (ZoO_index *) NULL; + c->sequences_ref_length = 0; + c->sequences_ref_sorted = (ZoO_index *) NULL; + c->occurrences = (ZoO_index *) NULL; + c->targets = (ZoO_index **) NULL; + c->targets_length = (ZoO_index *) NULL; + c->targets_occurrences = (ZoO_index **) NULL; +} + +static void initialize_word +( + struct ZoO_knowledge_word w [const restrict static 1] +) +{ + w->word = (const ZoO_char *) NULL; + w->word_size = 0; + w->occurrences = 0; + + initialize_sequence_collection(&(w->followed)); + initialize_sequence_collection(&(w->preceded)); +} + +/******************************************************************************/ +/** ALLOCATING MEMORY *********************************************************/ +/******************************************************************************/ +static ZoO_char * copy_word +( + const ZoO_char original [const restrict static 1], + const ZoO_index original_length +) +{ + ZoO_char * result; + + result = + (ZoO_char *) + calloc + ( + (size_t) (original_length + 1), + sizeof(ZoO_char) + ); + + if (result == (ZoO_char *) NULL) + { + ZoO_S_ERROR("Unable to allocate memory to store new word."); + + return (ZoO_char *) NULL; + } + + memcpy + ( + (void *) result, + (const void *) original, + (((size_t) original_length) * sizeof(ZoO_char)) + ); + + result[original_length] = '\0'; + + return 0; +} + +static int reallocate_words_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + struct ZoO_knowledge_word * new_words; + + if + ( + (SIZE_MAX / sizeof(struct ZoO_knowledge_word)) > (size_t) k->words_length + ) + { + ZoO_S_ERROR + ( + "Unable to store the size of the words list, as it would overflow" + "size_t variables." + ); + + return -1; + } + + new_words = + (struct ZoO_knowledge_word *) realloc + ( + (void *) k->words, + (((size_t) k->words_length) * sizeof(struct ZoO_knowledge_word)) + ); + + if (new_words == (struct ZoO_knowledge_word *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new words list." + ); + + return -1; + } + + k->words = new_words; + + return 0; +} + +static int reallocate_words_sorted_list +( + struct ZoO_knowledge k [const restrict static 1] +) +{ + ZoO_index * new_words_sorted; + + /* + * This has already been tested previously for a struct ZoO_knowledge_word, + * whose size is bigger than a ZoO_index. + * */ + /* + if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->words_length) + { + ZoO_S_ERROR + ( + "Unable to store the size of the sorted words list, as it would" + " overflow size_t variables." + ); + + return -1; + } + */ + + new_words_sorted = + (ZoO_index *) realloc + ( + (void *) k->words_sorted, + (((size_t) k->words_length) * sizeof(ZoO_index)) + ); + + if (new_words_sorted == (ZoO_index *) NULL) + { + ZoO_S_ERROR + ( + "Unable to allocate the memory required for the new sorted words list." + ); + + return -1; + } + + k->words_sorted = new_words_sorted; + + return 0; +} + +static void set_nth_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index sorted_word_id, + const ZoO_index word_id +) +{ + /* Safe: (> k->words_length 1) */ + if (sorted_word_id < (k->words_length - 1)) + { + memmove + ( + /* Safe: (=< (+ sorted_word_id 1) k->words_length) */ + (void *) (k->words_sorted + (sorted_word_id + 1)), + (const void *) (k->words_sorted + sorted_word_id), + ((k->words_length - 1) - sorted_word_id) + ); + } + + k->words_sorted[sorted_word_id] = word_id; +} + +static int add_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + const ZoO_index word_id, + const ZoO_index sorted_word_id +) +{ + ZoO_char * stored_word; + + if (k->words_length == ZoO_INDEX_MAX) + { + ZoO_S_ERROR + ( + "Unable to add word: the variable that stores the number of known " + "words would overflow." + ); + + return -1; + } + + stored_word = copy_word(word, word_length); + + if (stored_word == (ZoO_char *) NULL) + { + return -1; + } + + k->words_length += 1; + + if (reallocate_words_list(k) < 0) + { + k->words_length -= 1; + + return -1; + } + + initialize_word(k->words + word_id); + + k->words[word_id].word = stored_word; + k->words[word_id].word_size = ((word_length + 1) * sizeof(ZoO_char)); + + if (reallocate_words_sorted_list(k) < 0) + { + k->words_length -= 1; + + return -1; + } + + set_nth_word(k, sorted_word_id, word_id); + + return -1; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_knowledge_learn_word +( + struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const ZoO_index word_length, + ZoO_index word_id [const restrict static 1] +) +{ + ZoO_index sorted_id; + + if + ( + ZoO_knowledge_find_word_id + ( + k, + word, + (word_length * sizeof(ZoO_char)), + word_id + ) == 0 + ) + { + return 0; + } + + sorted_id = *word_id; + *word_id = k->words_length; + + return add_word(k, word, word_length, *word_id, sorted_id); +} diff --git a/src/knowledge/knowledge_search.c b/src/knowledge/knowledge_search.c new file mode 100644 index 0000000..a48585b --- /dev/null +++ b/src/knowledge/knowledge_search.c @@ -0,0 +1,336 @@ +#include + +#include "../core/char.h" +#include "../core/index.h" +#include "../core/sequence.h" + +#include "../cli/cli.h" + +#include "knowledge.h" + +/* See "knowledge.h". */ +int ZoO_knowledge_find_word_id +( + const struct ZoO_knowledge k [const restrict static 1], + const ZoO_char word [const restrict static 1], + const size_t word_size, + ZoO_index result [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max; + ZoO_index candidate_id; + + /* Handles the case where the list is empty ********************************/ + current_max = k->words_length; + + if (current_max == 0) + { + *result = 0; + + return -1; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + cmp = ZoO_word_cmp(word, word_size, k->words[k->words_sorted[i]].word); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *result = current_min; + + return -1; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *result = current_min; + + return -1; + } + + current_max = (i - 1); + } + else + { + *result = k->words_sorted[i]; + + return 0; + } + } +} + +int ZoO_knowledge_find_preceding_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index markov_order, /* Pre: (> 0) */ + const ZoO_index * restrict preceding_words [const restrict static 1], + const ZoO_index * restrict preceding_words_weights [const restrict static 1], + ZoO_index preceding_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max, local_sequence; + const ZoO_index * restrict candidate; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index word = sequence[markov_sequence_length]; + + if (word >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the preceding words of an unknown word." + ); + + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -1; + } + + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *preceding_words = (const ZoO_index *) k->words[word].preceded.targets; + + *preceding_words_weights = + (const ZoO_index *) k->words[word].preceded.targets_occurrences; + + *preceding_words_weights_sum = k->words[word].occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[word].preceded.sequences_ref_length; + + if (current_max == 0) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + ZoO_S_ERROR + ( + "Attempting to find the preceding words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + local_sequence = k->words[word].preceded.sequences_ref_sorted[i]; + + (void) ZoO_knowledge_get_sequence + ( + k, + k->words[word].preceded.sequences_ref[local_sequence], + &candidate + ); + + cmp = + ZoO_sequence_cmp + ( + sequence, + markov_sequence_length, + candidate, + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *preceding_words = (const ZoO_index *) NULL; + *preceding_words_weights = (const ZoO_index *) NULL; + *preceding_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *preceding_words = k->words[word].preceded.targets[local_sequence]; + + *preceding_words_weights = + k->words[word].preceded.targets_occurrences[local_sequence]; + + *preceding_words_weights_sum = + k->words[word].preceded.occurrences[local_sequence]; + + return 0; + } + } +} + +int ZoO_knowledge_find_following_words +( + const struct ZoO_knowledge k [const static 1], + const ZoO_index sequence [const restrict], + const ZoO_index sequence_length, + const ZoO_index markov_order, + const ZoO_index * restrict following_words [const restrict static 1], + const ZoO_index * restrict following_words_weights [const restrict static 1], + ZoO_index following_words_weights_sum [const restrict static 1] +) +{ + /* This is a binary search */ + int cmp; + ZoO_index i, current_min, current_max, local_sequence; + const ZoO_index * restrict candidate; + const ZoO_index markov_sequence_length = (markov_order - 1); + const ZoO_index sequence_offset = + ((sequence_length - markov_sequence_length) - 1); + const ZoO_index word = sequence[sequence_offset]; + + if (word >= k->words_length) + { + ZoO_S_ERROR + ( + "Attempting to find the following words of an unknown word." + ); + + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -1; + } + + if (markov_order == 1) + { + /* Special case: empty sequences. */ + *following_words = (const ZoO_index *) k->words[word].preceded.targets; + + *following_words_weights = + (const ZoO_index *) k->words[word].preceded.targets_occurrences; + + *following_words_weights_sum = k->words[word].occurrences; + + return 0; + } + + /* Handles the case where the list is empty ********************************/ + current_max = k->words[word].preceded.sequences_ref_length; + + if (current_max == 0) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + ZoO_S_WARNING + ( + "Attempting to find the following words of a sequence that never had " + "any." + ); + + return -2; + } + /***************************************************************************/ + + current_min = 0; + current_max -= 1; + + for (;;) + { + i = (current_min + ((current_max - current_min) / 2)); + + local_sequence = k->words[word].followed.sequences_ref_sorted[i]; + + (void) ZoO_knowledge_get_sequence + ( + k, + k->words[word].followed.sequences_ref[local_sequence], + &candidate + ); + + cmp = + ZoO_sequence_cmp + ( + (sequence + sequence_offset), + markov_sequence_length, + candidate, + markov_sequence_length + ); + + if (cmp > 0) + { + current_min = (i + 1); + + if (current_min > current_max) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + } + else if (cmp < 0) + { + if ((current_min > current_max) || (i == 0)) + { + *following_words = (const ZoO_index *) NULL; + *following_words_weights = (const ZoO_index *) NULL; + *following_words_weights_sum = 0; + + return -2; + } + + current_max = (i - 1); + } + else + { + *following_words = k->words[word].followed.targets[local_sequence]; + + *following_words_weights = + k->words[word].followed.targets_occurrences[local_sequence]; + + *following_words_weights_sum = + k->words[word].followed.occurrences[local_sequence]; + + return 0; + } + } +} diff --git a/src/knowledge/knowledge_types.h b/src/knowledge/knowledge_types.h new file mode 100644 index 0000000..7eafc8b --- /dev/null +++ b/src/knowledge/knowledge_types.h @@ -0,0 +1,38 @@ +#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_ +#define _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_ + +#include "../core/index_types.h" +#include "../core/char_types.h" + +struct ZoO_knowledge_sequence_collection +{ + ZoO_index * sequences_ref; + ZoO_index sequences_ref_length; + ZoO_index * sequences_ref_sorted; + ZoO_index * occurrences; + ZoO_index ** targets; + ZoO_index * targets_length; + ZoO_index ** targets_occurrences; +}; + +struct ZoO_knowledge_word +{ + const ZoO_char * word; + size_t word_size; + ZoO_index occurrences; + struct ZoO_knowledge_sequence_collection followed; + struct ZoO_knowledge_sequence_collection preceded; +}; + +struct ZoO_knowledge +{ + struct ZoO_knowledge_word * words; + ZoO_index words_length; + ZoO_index * words_sorted; + ZoO_index ** sequences; + ZoO_index sequences_length; + ZoO_index * sequences_sorted; + ZoO_index sequences_length; +}; + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..bb4ae23 --- /dev/null +++ b/src/main.c @@ -0,0 +1,436 @@ +#include +#include +#include +#include +#include + +#include "../tool/strings.h" + +#include "../io/error.h" +#include "../io/parameters.h" +#include "../io/data_input.h" +#include "../io/data_output.h" +#include "../io/network.h" + +#include "knowledge.h" + +#include "state_types.h" + +static int run = 1; + +static void request_termination (int const signo) +{ + if ((signo == SIGINT) || (signo == SIGTERM)) + { + run = 0; + } +} + +static int initialize +( + struct ZoO_state s [const static 1], + int const argc, + const char * argv [const static argc] +) +{ + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is initializing..."); + + srand(time(NULL)); + + /* prevents s [restrict] */ + if (ZoO_knowledge_initialize(&(s->knowledge)) < 0) + { + return -1; + } + + if (ZoO_parameters_initialize(&(s->param), argc, argv) < 1) + { + ZoO_knowledge_finalize(&(s->knowledge)); + + return -1; + } + + return 0; +} + +static int load_data_file (struct ZoO_state s [const static 1]) +{ + struct ZoO_data_input input; + char * result; + + if (ZoO_data_input_open(&input, s->param.data_filename) < 0) + { + return -1; + } + + while + ( + ZoO_data_input_read_line + ( + &input, + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) == 0 + ) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + &(input.string), + s->param.aliases_count, + s->param.aliases + ); + } + + ZoO_data_input_close(&input); + + return 0; +} + +static int finalize (struct ZoO_state s [const static 1]) +{ + int error; + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One is finalizing..."); + + error = 0; + + /* prevents s [restrict] */ + ZoO_knowledge_finalize(&(s->knowledge)); + + return error; +} + +static int network_connect (struct ZoO_state s [const static 1]) +{ + return + ZoO_network_connect + ( + &(s->network), + s->param.irc_server_addr, + s->param.irc_server_port, + s->param.irc_server_channel, + s->param.irc_username, + s->param.irc_realname, + s->param.aliases[0] + ); +} + +static int should_reply +( + struct ZoO_parameters param [const restrict static 1], + struct ZoO_strings string [const restrict static 1], + int should_learn [const restrict static 1] +) +{ + ZoO_index i, j; + + for (i = 0; i < param->aliases_count; ++i) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[0])) + { + *should_learn = 0; + + return 1; + } + + for (j = 1; j < string->words_count; ++j) + { + if (ZoO_IS_PREFIX(param->aliases[i], string->words[j])) + { + *should_learn = 1; + + return 1; + } + } + } + + *should_learn = 1; + + return (param->reply_rate >= (rand() % 100)); +} + +static void handle_user_join +( + struct ZoO_state s [const static 1], + struct ZoO_strings string [const restrict static 1], + ssize_t const msg_offset, + ssize_t const msg_size +) +{ + ZoO_char * line; + ZoO_index loc; + + if (s->param.reply_rate < (rand() % 100)) + { + return; + } + + if + ( + ZoO_strings_parse + ( + string, + (size_t) msg_size, + (s->network.in + msg_offset), + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) < 0 + ) + { + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect join username."); + + return; + } + + if + ( + ( + ZoO_knowledge_find + ( + &(s->knowledge), + string->words[0], + &loc + ) < 0 + ) + || (s->knowledge.words[loc].backward_links_count <= 3) + || (s->knowledge.words[loc].forward_links_count <= 3) + ) + { + if + ( + ZoO_knowledge_extend + ( + &(s->knowledge), + (struct ZoO_strings *) NULL, + 0, + (const char **) NULL, + &line + ) == 0 + ) + { + if (line[0] == ' ') + { + strcpy((s->network.out), (line + 1)); + } + else + { + strcpy((s->network.out), line); + } + + free((void *) line); + + ZoO_network_send(&(s->network)); + } + } + else + { + if + ( + ZoO_knowledge_extend + ( + &(s->knowledge), + string, + 0, + (const char **) NULL, + &line + ) == 0 + ) + { + if (line[0] == ' ') + { + strcpy((s->network.out), (line + 1)); + } + else + { + strcpy((s->network.out), line); + } + + free((void *) line); + + ZoO_network_send(&(s->network)); + } + } +} + +static void handle_message +( + struct ZoO_state s [const static 1], + struct ZoO_strings string [const restrict static 1], + ssize_t const msg_offset, + /* FIXME: somehow we end up using (msg_size + 1), meaning there's a mixup + * between size and length. + */ + ssize_t const msg_size +) +{ + ZoO_char * line; + int reply, learn; + + if + ( + ZoO_strings_parse + ( + string, + (size_t) msg_size, + (s->network.in + msg_offset), + ZoO_knowledge_punctuation_chars_count, + ZoO_knowledge_punctuation_chars + ) < 0 + ) + { + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Could not dissect msg."); + + return; + } + + if (string->words_count == 0) + { + return; + } + + reply = should_reply(&(s->param), string, &learn); + + if (learn) + { + /* + * It would be best to do that after replying, but by then we no longer + * have the string in 's->network.in'. + */ + (void) ZoO_data_output_write_line + ( + s->param.new_data_filename, + (s->network.in + msg_offset), + (size_t) (msg_size + 1) + ); + } + + if + ( + reply + && + ( + ZoO_knowledge_extend + ( + &(s->knowledge), + string, + s->param.aliases_count, + s->param.aliases, + &line + ) == 0 + ) + ) + { + if (line[0] == ' ') + { + strcpy((s->network.out), (line + 1)); + } + else + { + strcpy((s->network.out), line); + } + + free((void *) line); + + ZoO_network_send(&(s->network)); + } + + if (learn) + { + (void) ZoO_knowledge_assimilate + ( + &(s->knowledge), + string, + s->param.aliases_count, + s->param.aliases + ); + } +} + +static int main_loop (struct ZoO_state s [const static 1]) +{ + struct ZoO_strings string; + ssize_t msg_offset, msg_size; + enum ZoO_msg_type msg_type; + + msg_offset = 0; + msg_size = 0; + + ZoO_strings_initialize(&string); + + while (run) + { + if + ( + ZoO_network_receive + ( + &(s->network), + &msg_offset, + &msg_size, + &msg_type + ) == 0 + ) + { + switch (msg_type) + { + case ZoO_JOIN: + handle_user_join(s, &string, msg_offset, msg_size); + break; + + case ZoO_PRIVMSG: + handle_message(s, &string, msg_offset, msg_size); + break; + } + } + } + + ZoO_strings_finalize(&string); + + ZoO_network_disconnect(&(s->network)); + + return 0; +} + +int main (int const argc, const char * argv [const static argc]) +{ + struct ZoO_state s; + + if (initialize(&s, argc, argv) < 0) + { + return -1; + } + + if (load_data_file(&s) < 0) + { + goto CRASH; + } + + if (network_connect(&s) < 0) + { + goto CRASH; + } + + if (main_loop(&s) < 0) + { + goto CRASH; + } + + (void) finalize(&s); + + ZoO_S_DEBUG(ZoO_DEBUG_PROGRAM_FLOW, "Zero of One terminated normally."); + + return 0; + + CRASH: + { + (void) finalize(&s); + + ZoO_S_DEBUG + ( + ZoO_DEBUG_PROGRAM_FLOW, + "Zero of One terminated by crashing." + ); + + return -1; + } +} diff --git a/src/pervasive.h b/src/pervasive.h index b830326..c7c53a2 100644 --- a/src/pervasive.h +++ b/src/pervasive.h @@ -3,6 +3,12 @@ #include +#define ZoO_DEBUG_ALL 1 + +#ifndef ZoO_DEBUG_ALL + #define ZoO_DEBUG_ALL 0 +#endif + #ifndef ZoO_NETWORK_TIMEOUT #define ZoO_NETWORK_TIMEOUT 200 #endif @@ -11,34 +17,6 @@ #define ZoO_MAX_REPLY_WORDS 64 #endif -#ifndef ZoO_DEFAULT_DATA_FILENAME - #define ZoO_DEFAULT_DATA_FILENAME "./memory.txt" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR - #define ZoO_DEFAULT_IRC_SERVER_ADDR "irc.foonetic.net" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_PORT - #define ZoO_DEFAULT_IRC_SERVER_PORT "6667" -#endif - -#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL - #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind" -#endif - -#ifndef ZoO_DEFAULT_IRC_USERNAME - #define ZoO_DEFAULT_IRC_USERNAME "zeroofone" -#endif - -#ifndef ZoO_DEFAULT_IRC_REALNAME - #define ZoO_DEFAULT_IRC_REALNAME "Zero of One (bot)" -#endif - -#ifndef ZoO_DEFAULT_REPLY_RATE - #define ZoO_DEFAULT_REPLY_RATE 8 -#endif - #define ZoO__TO_STRING(x) #x #define ZoO_TO_STRING(x) ZoO__TO_STRING(x) #define ZoO_ISOLATE(a) do {a} while (0) -- cgit v1.2.3-70-g09d2