summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/CMakeLists.txt10
-rw-r--r--src/cli/CMakeLists.txt6
-rw-r--r--src/cli/cli.h (renamed from src/io/error.h)13
-rw-r--r--src/cli/parameters.c (renamed from src/io/parameters.c)0
-rw-r--r--src/cli/parameters.h (renamed from src/io/parameters.h)0
-rw-r--r--src/cli/parameters_types.h67
-rw-r--r--src/core/CMakeLists.txt9
-rw-r--r--src/core/assimilate.c281
-rw-r--r--src/core/char.c18
-rw-r--r--src/core/char.h35
-rw-r--r--src/core/char_types.h6
-rw-r--r--src/core/index.c61
-rw-r--r--src/core/index.h11
-rw-r--r--src/core/index_types.h2
-rw-r--r--src/core/sequence.c84
-rw-r--r--src/core/sequence.h20
-rw-r--r--src/core/sequence_creation.c9
-rw-r--r--src/core/sequence_from_string.c315
-rw-r--r--src/core/sequence_types.h3
-rw-r--r--src/file/data_input.c (renamed from src/io/data_input.c)0
-rw-r--r--src/file/data_input.h (renamed from src/io/data_input.h)0
-rw-r--r--src/file/data_input_types.h (renamed from src/io/data_input_types.h)0
-rw-r--r--src/file/data_output.c (renamed from src/io/data_output.c)1
-rw-r--r--src/file/data_output.h (renamed from src/io/data_output.h)0
-rw-r--r--src/io/CMakeLists.txt9
-rw-r--r--src/io/parameters_types.h21
-rw-r--r--src/irc/network.c (renamed from src/io/network.c)0
-rw-r--r--src/irc/network.h (renamed from src/io/network.h)0
-rw-r--r--src/irc/network_types.h (renamed from src/io/network_types.h)0
-rw-r--r--src/knowledge/CMakeLists.txt11
-rw-r--r--src/knowledge/knowledge.c (renamed from src/core/knowledge.c)8
-rw-r--r--src/knowledge/knowledge.h (renamed from src/core/knowledge.h)27
-rw-r--r--src/knowledge/knowledge_finalize.c (renamed from src/core/knowledge_finalize.c)7
-rw-r--r--src/knowledge/knowledge_learn_sequence.c324
-rw-r--r--src/knowledge/knowledge_learn_word.c276
-rw-r--r--src/knowledge/knowledge_search.c (renamed from src/core/knowledge_search.c)7
-rw-r--r--src/knowledge/knowledge_types.h (renamed from src/core/knowledge_types.h)5
-rw-r--r--src/main.c (renamed from src/core/main.c)0
-rw-r--r--src/pervasive.h34
39 files changed, 1267 insertions, 413 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 76a73ed..594b14f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,5 +1,13 @@
+add_subdirectory(cli)
add_subdirectory(core)
-add_subdirectory(io)
+add_subdirectory(file)
+add_subdirectory(irc)
+add_subdirectory(knowledge)
add_subdirectory(tool)
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.c
+)
+
set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt
new file mode 100644
index 0000000..94e6337
--- /dev/null
+++ b/src/cli/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c
+)
+set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
+
diff --git a/src/io/error.h b/src/cli/cli.h
index be7359f..5aec25b 100644
--- a/src/io/error.h
+++ b/src/cli/cli.h
@@ -1,15 +1,10 @@
-#ifndef _ZoO_IO_ERROR_H_
-#define _ZoO_IO_ERROR_H_
+#ifndef _ZoO_CLI_CLI_H_
+#define _ZoO_CLI_CLI_H_
#include <stdio.h>
#include "../pervasive.h"
-#define ZoO_DEBUG_ALL 1
-
-#ifndef ZoO_DEBUG_ALL
- #define ZoO_DEBUG_ALL 0
-#endif
#ifndef ZoO_DEBUG_PROGRAM_FLOW
#define ZoO_DEBUG_PROGRAM_FLOW (0 || ZoO_DEBUG_ALL)
@@ -23,7 +18,9 @@
#define ZoO_DEBUG_LEARNING (0 || ZoO_DEBUG_ALL)
#endif
-#define ZoO_DEBUG_NETWORK 1
+#ifndef ZoO_DEBUG_NETWORK
+ #define ZoO_DEBUG_NETWORK 1
+#endif
#ifndef ZoO_DEBUG_NETWORK
#define ZoO_DEBUG_NETWORK (0 || ZoO_DEBUG_ALL)
diff --git a/src/io/parameters.c b/src/cli/parameters.c
index 77c33aa..77c33aa 100644
--- a/src/io/parameters.c
+++ b/src/cli/parameters.c
diff --git a/src/io/parameters.h b/src/cli/parameters.h
index 1011e2b..1011e2b 100644
--- a/src/io/parameters.h
+++ b/src/cli/parameters.h
diff --git a/src/cli/parameters_types.h b/src/cli/parameters_types.h
new file mode 100644
index 0000000..15b5254
--- /dev/null
+++ b/src/cli/parameters_types.h
@@ -0,0 +1,67 @@
+#ifndef _ZoO_IO_PARAMETERS_TYPES_H_
+#define _ZoO_IO_PARAMETERS_TYPES_H_
+
+#include "../pervasive.h"
+
+/******************************************************************************/
+/** DEFAULT VALUES ************************************************************/
+/******************************************************************************/
+
+#ifndef ZoO_DEFAULT_DATA_FILENAME
+ #define ZoO_DEFAULT_DATA_FILENAME "./memory.txt"
+#endif
+
+#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR
+ #define ZoO_DEFAULT_IRC_SERVER_ADDR "irc.foonetic.net"
+#endif
+
+#ifndef ZoO_DEFAULT_IRC_SERVER_PORT
+ #define ZoO_DEFAULT_IRC_SERVER_PORT "6667"
+#endif
+
+#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL
+ #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind"
+#endif
+
+#ifndef ZoO_DEFAULT_IRC_USERNAME
+ #define ZoO_DEFAULT_IRC_USERNAME "zeroofone"
+#endif
+
+#ifndef ZoO_DEFAULT_IRC_REALNAME
+ #define ZoO_DEFAULT_IRC_REALNAME "Zero of One (bot)"
+#endif
+
+#ifndef ZoO_DEFAULT_REPLY_RATE
+ #define ZoO_DEFAULT_REPLY_RATE 8
+#endif
+
+/******************************************************************************/
+/** DEBUG LEVELS **************************************************************/
+/******************************************************************************/
+
+#ifndef ZoO_DEBUG_PARAMETERS
+ #define ZoO_DEBUG_PARAMETERS (0 || ZoO_DEBUG_ALL)
+#endif
+
+/******************************************************************************/
+/** FUNCTIONS *****************************************************************/
+/******************************************************************************/
+
+struct ZoO_parameters
+{
+ const char * restrict data_filename;
+ const char * restrict new_data_filename;
+
+ const char * restrict irc_server_addr;
+ const char * restrict irc_server_port;
+ const char * restrict irc_server_channel;
+ const char * restrict irc_username;
+ const char * restrict irc_realname;
+
+ int reply_rate;
+
+ int aliases_count;
+ const char * restrict * restrict aliases;
+};
+
+#endif
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index fe28080..1e1daa8 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -1,13 +1,10 @@
set(
SRC_FILES ${SRC_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/char.c
- ${CMAKE_CURRENT_SOURCE_DIR}/main.c
- ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c
- ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c
- ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c
- ${CMAKE_CURRENT_SOURCE_DIR}/assimilate.c
- ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/index.c
${CMAKE_CURRENT_SOURCE_DIR}/sequence.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/sequence_from_string.c
)
set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
diff --git a/src/core/assimilate.c b/src/core/assimilate.c
deleted file mode 100644
index 7f03e1b..0000000
--- a/src/core/assimilate.c
+++ /dev/null
@@ -1,281 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-
-#include "../io/error.h"
-
-#include "knowledge.h"
-
-/** Functions to assimilate sentences using a ZoO_knowledge structure *********/
-
-
-static int add_sequence
-(
- ZoO_index links_count [const],
- struct ZoO_knowledge_link * links [const],
- ZoO_index const sequence [const restrict static ZoO_MARKOV_ORDER],
- ZoO_index const target_i,
- ZoO_index const offset
-)
-{
- ZoO_index link_index, i;
- struct ZoO_knowledge_link * link;
- ZoO_index * new_p;
-
- if
- (
- ZoO_knowledge_get_link
- (
- links_count,
- links,
- (sequence + offset),
- &link_index
- ) < 0
- )
- {
- return -1;
- }
-
- link = (*links + link_index);
- link->occurrences += 1;
-
- for (i = 0; i < link->targets_count; ++i)
- {
- if (link->targets[i] == sequence[target_i])
- {
- link->targets_occurrences[i] += 1;
-
- return 0;
- }
- }
-
- link->targets_count += 1;
-
- new_p =
- (ZoO_index *) realloc
- (
- (void *) link->targets,
- (sizeof(ZoO_index) * link->targets_count)
- );
-
- if (new_p == (ZoO_index *) NULL)
- {
- link->targets_count -= 1;
-
- /* TODO: err. */
- return -1;
- }
-
- link->targets = new_p;
- link->targets[link->targets_count - 1] = sequence[target_i];
-
- new_p =
- (ZoO_index *) realloc
- (
- (void *) link->targets_occurrences,
- (sizeof(ZoO_index) * link->targets_count)
- );
-
- if (new_p == (ZoO_index *) NULL)
- {
- link->targets_count -= 1;
-
- /* TODO: err. */
- return -1;
- }
-
- link->targets_occurrences = new_p;
- link->targets_occurrences[link->targets_count - 1] = 1;
-
- return 0;
-}
-
-static int add_word_occurrence
-(
- struct ZoO_knowledge k [const restrict static 1],
- ZoO_index const sequence [const static ((ZoO_MARKOV_ORDER * 2) + 1)]
-)
-{
- ZoO_index w;
- int error;
-
- w = sequence[ZoO_MARKOV_ORDER];
-
- error =
- add_sequence
- (
- &(k->words[w].forward_links_count),
- &(k->words[w].forward_links),
- sequence + (ZoO_MARKOV_ORDER + 1),
- (ZoO_MARKOV_ORDER - 1),
- 0
- );
-
- error =
- (
- add_sequence
- (
- &(k->words[w].backward_links_count),
- &(k->words[w].backward_links),
- sequence,
- 0,
- 1
- )
- | error
- );
-
- return error;
-}
-
-static int should_assimilate
-(
- struct ZoO_strings string [const restrict static 1],
- ZoO_index const aliases_count,
- const char * restrict aliases [const restrict static aliases_count]
-)
-{
- ZoO_index i;
-
- /* Don't assimilate empty strings. */
- if (string->words_count == 0)
- {
- return 0;
- }
-
- /* Don't assimilate things that start with our name. */
- for (i = 0; i < aliases_count; ++i)
- {
- if (ZoO_IS_PREFIX(aliases[i], string->words[0]))
- {
- return 0;
- }
- }
-
- return 1;
-}
-
-static int init_sequence
-(
- struct ZoO_knowledge k [const static 1],
- struct ZoO_strings string [const restrict static 1],
- ZoO_index sequence [const restrict static ((ZoO_MARKOV_ORDER * 2) + 1)]
-)
-{
- ZoO_index i;
-
- /* We are going to link this sequence to ZoO_WORD_START_OF_LINE */
- sequence[ZoO_MARKOV_ORDER] = ZoO_WORD_START_OF_LINE;
-
- for (i = 1; i <= ZoO_MARKOV_ORDER; ++i)
- {
- sequence[ZoO_MARKOV_ORDER - i] = ZoO_WORD_START_OF_LINE;
-
- if (i <= string->words_count)
- {
- if
- (
- ZoO_knowledge_learn
- (
- k,
- string->words[i - 1],
- (sequence + (ZoO_MARKOV_ORDER + i))
- ) < 0
- )
- {
- return -1;
- }
- }
- else
- {
- sequence[ZoO_MARKOV_ORDER + i] = ZoO_WORD_END_OF_LINE;
- }
- }
-
- return 0;
-}
-
-int ZoO_knowledge_assimilate
-(
- struct ZoO_knowledge k [const static 1],
- struct ZoO_strings string [const restrict static 1],
- ZoO_index const aliases_count,
- const char * restrict aliases [const restrict static aliases_count]
-)
-{
- int error;
- ZoO_index sequence[(ZoO_MARKOV_ORDER * 2) + 1];
- ZoO_index next_word, new_word, new_word_id;
-
- if (!should_assimilate(string, aliases_count, aliases))
- {
- return 0;
- }
-
- if (init_sequence(k, string, sequence) < 0)
- {
- return -1;
- }
-
- if (add_word_occurrence(k, sequence) < 0)
- {
- error = -1;
-
- /* There's a pun... */
- ZoO_S_WARNING("Could not add a link between words.");
-
- return -1;
- }
-
- error = 0;
-
- next_word = 0;
- new_word = ZoO_MARKOV_ORDER;
-
- while (next_word <= (string->words_count + ZoO_MARKOV_ORDER))
- {
- if (new_word < string->words_count)
- {
- /* prevents words [restrict], k [restrict] */
- if (ZoO_knowledge_learn(k, string->words[new_word], &new_word_id) < 0)
- {
- return -1;
- }
- }
- else
- {
- new_word_id = ZoO_WORD_END_OF_LINE;
- }
-
- memmove
- (
- (void *) sequence,
- (const void *) (sequence + 1),
- /* Accepts 0. */
- (sizeof(ZoO_index) * (ZoO_MARKOV_ORDER * 2))
- );
-
- sequence[ZoO_MARKOV_ORDER * 2] = new_word_id;
-
- if (add_word_occurrence(k, sequence) < 0)
- {
- error = -1;
-
- /* There's a pun... */
- ZoO_S_WARNING("Could not add a link between words.");
-
- return -1;
- }
-
- /*
- * Safe:
- * - next_word < words_count
- * - words_count =< ZoO_INDEX_MAX
- * ----
- * next_word < ZoO_INDEX_MAX
- */
- next_word += 1;
- new_word += 1;
- }
-
- return error;
-}
-
diff --git a/src/core/char.c b/src/core/char.c
index 39ca72e..9297643 100644
--- a/src/core/char.c
+++ b/src/core/char.c
@@ -2,6 +2,18 @@
#include "char.h"
+/* See: "char.c" */
+ZoO_char ZoO_char_to_lowercase (const ZoO_char c)
+{
+ if ((c >= 'A') && (c <= 'Z'))
+ {
+ return 'z' - ('Z' - c);
+ }
+
+ return c;
+}
+
+/* See: "char.c" */
int ZoO_char_is_banned (const ZoO_char c)
{
switch (c)
@@ -21,6 +33,7 @@ int ZoO_char_is_banned (const ZoO_char c)
}
}
+/* See: "char.c" */
int ZoO_char_is_punctuation (const ZoO_char c)
{
switch (c)
@@ -38,11 +51,14 @@ int ZoO_char_is_punctuation (const ZoO_char c)
}
}
+/* See: "char.c" */
int ZoO_word_cmp
(
const ZoO_char word_a [const static 1],
+ const size_t word_a_size,
const ZoO_char word_b [const static 1]
)
{
- return strcmp((const char *) word_a, (const char *) word_b);
+ return strncmp((const char *) word_a, (const char *) word_b, word_a_size);
}
+
diff --git a/src/core/char.h b/src/core/char.h
index 772a3a2..2b4a355 100644
--- a/src/core/char.h
+++ b/src/core/char.h
@@ -3,19 +3,42 @@
#include "char_types.h"
-enum ZoO_word_property ZoO_get_word_property
-(
- const ZoO_char word [const restrict],
- size_t word_size
-);
-
+/* Compares two words. {word_a} does not have to be null terminated. */
+/*@
+ @ requires null_terminated_string(word_b);
+ @ requires ((length(word_a) * sizeof(ZoO_char)) == word_a_size);
+ @ ensures ((\result == 1) || (\result == 0) || (\result == -1));
+ @*/
int ZoO_word_cmp
(
const ZoO_char word_a [const static 1],
+ const size_t word_a_size,
const ZoO_char word_b [const static 1]
);
+/*
+ * Returns the lowercase equivalent of ZoO_char that are included in ['A','Z'].
+ * Other ZoO_char are returned untouched.
+ */
+ZoO_char ZoO_char_to_lowercase (const ZoO_char c);
+
+/*
+ * Returns '1' iff {c} should be considered as an punctuation character, '0'
+ * otherwise.
+ */
+/*@
+ @ ensures ((\result == 1) || (\result == 0));
+ @*/
int ZoO_char_is_punctuation (const ZoO_char c);
+
+/*
+ * Returns '1' iff containing {c} means the word should not be learned. '0'
+ * otherwise.
+ */
+/*@
+ @ ensures ((\result == 1) || (\result == 0));
+ @*/
int ZoO_word_char_is_banned (const ZoO_char c);
#endif
+
diff --git a/src/core/char_types.h b/src/core/char_types.h
index 67b5294..a2a736c 100644
--- a/src/core/char_types.h
+++ b/src/core/char_types.h
@@ -1,12 +1,16 @@
#ifndef _ZoO_CORE_CHAR_TYPES_H_
#define _ZoO_CORE_CHAR_TYPES_H_
-
+/*
+ * FIXME: Does not belong here.
+ */
+/*
enum ZoO_word_property
{
ZoO_WORD_NO_PROPERTY,
ZoO_WORD_HAS_NO_LEFT_SEPARATOR,
ZoO_WORD_HAS_NO_RIGHT_SEPARATOR
};
+*/
/* ZoO_char = UTF-8 char */
typedef char ZoO_char;
diff --git a/src/core/index.c b/src/core/index.c
new file mode 100644
index 0000000..375e0ad
--- /dev/null
+++ b/src/core/index.c
@@ -0,0 +1,61 @@
+#include <limits.h>
+#include <stdlib.h>
+
+#include "index.h"
+
+#if (RAND_MAX < UCHAR_MAX)
+ #error "RAND_MAX < UCHAR_MAX, unable to generate random numbers."
+#endif
+
+#if (RAND_MAX == 0)
+ #error "RAND_MAX is included in [0, 0]. What are you even doing?"
+#endif
+
+/*
+ * Returns a random unsigned char.
+ */
+static unsigned char random_uchar (void)
+{
+ return
+ (unsigned char)
+ (
+ /* FIXME: Do floats allow enough precision for this? */
+ (
+ ((float) rand())
+ / ((float) RAND_MAX)
+ )
+ * ((float) UCHAR_MAX)
+ );
+}
+
+/* See: "index.h" */
+ZoO_index ZoO_index_random (void)
+{
+ ZoO_index i;
+ ZoO_index result;
+ unsigned char * result_bytes;
+
+ result_bytes = (unsigned char *) &result;
+
+ for (i = 0; i < sizeof(ZoO_index); ++i)
+ {
+ result_bytes[i] = random_uchar();
+ }
+
+ return result;
+}
+
+/* See: "index.h" */
+ZoO_index ZoO_index_random_up_to (const ZoO_index max)
+{
+ return
+ (ZoO_index)
+ (
+ /* FIXME: Do floats allow enough precision for this? */
+ (
+ ((float) ZoO_index_random())
+ / ((float) ZoO_INDEX_MAX)
+ )
+ * ((float) max)
+ );
+}
diff --git a/src/core/index.h b/src/core/index.h
index 76e3507..1417662 100644
--- a/src/core/index.h
+++ b/src/core/index.h
@@ -3,6 +3,17 @@
#include "index_types.h"
+/*
+ * Returns a random ZoO_index.
+ */
+ZoO_index ZoO_index_random (void);
+
+/*
+ * Returns a random ZoO_index, included in [0, limit]
+ */
+/*@
+ @ ensures (\result <= limit);
+ @*/
ZoO_index ZoO_index_random_up_to (const ZoO_index limit);
#endif
diff --git a/src/core/index_types.h b/src/core/index_types.h
index 2d769ca..ad56d52 100644
--- a/src/core/index_types.h
+++ b/src/core/index_types.h
@@ -1,8 +1,10 @@
#ifndef _ZoO_CORE_INDEX_TYPES_H_
#define _ZoO_CORE_INDEX_TYPES_H_
+/* Must be unsigned. */
typedef unsigned int ZoO_index;
+/* Must be > 0. */
#define ZoO_INDEX_MAX UINT_MAX
#endif
diff --git a/src/core/sequence.c b/src/core/sequence.c
index 9e370a3..d7ff9d0 100644
--- a/src/core/sequence.c
+++ b/src/core/sequence.c
@@ -5,18 +5,56 @@
#include "sequence.h"
+/*
+ * Bypass rendundant ZoO_START_OF_SEQUENCE_ID at the start of a sequence.
+ */
+/* ensures (*sequence_offset <= sequence_length) */
+static void bypass_redundant_sos
+(
+ const ZoO_index sequence [const restrict],
+ const ZoO_index sequence_length,
+ ZoO_index sequence_offset [const restrict static 1]
+)
+{
+ ZoO_index i;
+
+ *sequence_offset = 0;
+
+ for (i = 0; i < sequence_length; ++i)
+ {
+ if (sequence[i] != ZoO_START_OF_SEQUENCE_ID)
+ {
+ return;
+ }
+ else if (sequence[i] == ZoO_START_OF_SEQUENCE_ID)
+ {
+ *sequence_offset = i;
+ }
+ }
+}
+
+
/* See "sequence.h" */
int ZoO_sequence_cmp
(
const ZoO_index sequence_a [const],
- const ZoO_index sequence_a_length,
+ ZoO_index sequence_a_length,
const ZoO_index sequence_b [const],
- const ZoO_index sequence_b_length
+ ZoO_index sequence_b_length
)
{
- ZoO_index min_length;
+ ZoO_index min_length, a, b;
+ ZoO_index a_offset, b_offset;
ZoO_index i;
+ bypass_redundant_sos(sequence_a, sequence_a_length, &a_offset);
+ bypass_redundant_sos(sequence_b, sequence_b_length, &b_offset);
+
+ /*@ requires (*a_offset <= sequence_a_length) @*/
+ sequence_a_length -= a_offset;
+ /*@ requires (*b_offset <= sequence_b_length) @*/
+ sequence_b_length -= b_offset;
+
if (sequence_a_length < sequence_b_length)
{
min_length = sequence_a_length;
@@ -26,47 +64,37 @@ int ZoO_sequence_cmp
min_length = sequence_b_length;
}
+ /*@ ensures (min_length <= sequence_a_length) @*/
+ /*@ ensures (min_length <= sequence_b_length) @*/
+
for (i = 0; i < min_length; ++i)
{
- if (sequence_a[i] < sequence_b[i])
+ /*@ requires ((i + a_offset) < sequence_a_length) @*/
+ a = sequence_a[i + a_offset];
+ /*@ requires ((i + b_offset) < sequence_b_length) @*/
+ b = sequence_b[i + b_offset];
+
+ if (a < b)
{
return -1;
}
- else if (sequence_b[i] > sequence_b[i])
+ else if (b > a)
{
return 1;
}
- else if
- (
- (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID)
- && (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID)
- )
+ else if ((a == ZoO_END_OF_SEQUENCE_ID) && (b == ZoO_END_OF_SEQUENCE_ID))
{
return 0;
}
}
- if (sequence_a_length < sequence_b_length)
+ if (sequence_a_length > sequence_b_length)
{
- if (sequence_b[i] == ZoO_END_OF_SEQUENCE_ID)
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ return 1;
}
- else if (sequence_a_length > sequence_b_length)
+ else if (sequence_a_length < sequence_b_length)
{
- if (sequence_a[i] == ZoO_END_OF_SEQUENCE_ID)
- {
- return 0;
- }
- else
- {
- return 1;
- }
+ return -1;
}
else
{
diff --git a/src/core/sequence.h b/src/core/sequence.h
index e609b4d..77ecd6c 100644
--- a/src/core/sequence.h
+++ b/src/core/sequence.h
@@ -1,11 +1,21 @@
#ifndef _ZoO_CORE_SEQUENCE_H_
#define _ZoO_CORE_SEQUENCE_H_
+#include "../core/char_types.h"
#include "../core/index_types.h"
-#include "../core/knowledge_types.h"
+#include "../knowledge/knowledge_types.h"
#include "sequence_types.h"
+int ZoO_sequence_from_undercase_string
+(
+ const ZoO_char string [const restrict],
+ const ZoO_index string_length,
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1]
+);
+
/*
* Creates a sequence containing {initial_word}. The remaining elements of
* sequence are added according to what is known to {k} as being possible.
@@ -42,7 +52,13 @@ int ZoO_sequence_create_from
* ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated
* sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and
* [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to
- * contain ZoO_END_OF_SEQUENCE.
+ * contain ZoO_END_OF_SEQUENCE. [10][ZoO_END_OF_SEQUENCE] and [10] are
+ * considered different, [10][ZoO_END_OF_SEQUENCE]
+ * and [10][ZoO_END_OF_SEQUENCE][ZoO_END_OF_SEQUENCE] are considered equal.
+ * Same logic is applyied for ZoO_START_OF_SEQUENCE:
+ * [START_OF_SEQUENCE][10] is not [10], but
+ * [START_OF_SEQUENCE][START_OF_SEQUENCE][10] and [START_OF_SEQUENCE][10] are
+ * the same.
* Return:
* 1 iff {sequence_a} should be considered being more than {sequence_b}
* 0 iff {sequence_a} should be considered being equal to {sequence_b}
diff --git a/src/core/sequence_creation.c b/src/core/sequence_creation.c
index 1133be9..f460629 100644
--- a/src/core/sequence_creation.c
+++ b/src/core/sequence_creation.c
@@ -19,6 +19,11 @@
* (> weights_sum 0).
* (= (sum weights) weights_sum).
*/
+/*@
+ @ requires (weights_sum > 0);
+ @ requires \valid(weights);
+ @ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum);
+@*/
static ZoO_index weighted_random_pick
(
const ZoO_index weights [const restrict static 1],
@@ -29,12 +34,12 @@ static ZoO_index weighted_random_pick
accumulator = 0;
- /* Safe: Included in [0, weights_sum]. */
random_number = ZoO_index_random_up_to(weights_sum);
+ /*@ ensures (0 <= random_number <= weights_sum); @*/
for (result = 0; accumulator < random_number; ++result)
{
- /* Safe: (= (sum weights) weights_sum) */
+ /*@ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); @*/
accumulator += weights[result];
}
diff --git a/src/core/sequence_from_string.c b/src/core/sequence_from_string.c
new file mode 100644
index 0000000..51d7049
--- /dev/null
+++ b/src/core/sequence_from_string.c
@@ -0,0 +1,315 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../core/char.h"
+#include "../core/index.h"
+
+#include "../cli/cli.h"
+
+#include "../knowledge/knowledge.h"
+
+#include "sequence.h"
+
+static int add_word_id_to_sequence
+(
+ const ZoO_index word_id,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1]
+)
+{
+ ZoO_index * new_sequence;
+
+ *sequence_length += 1;
+
+ new_sequence =
+ (ZoO_index *) realloc
+ (
+ (void *) *sequence,
+ (((size_t) sequence_length) * sizeof(ZoO_index))
+ );
+
+ if (new_sequence == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR("Unable to reallocate a sequence to add word ids to it.");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/******************************************************************************/
+/** HANDLING PUNCTUATION ******************************************************/
+/******************************************************************************/
+static int add_punctuation_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_char punctuation,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ const struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index word_id;
+ ZoO_char as_word[2];
+
+ as_word[0] = punctuation;
+ as_word[1] = '\0';
+
+ if (ZoO_knowledge_find_word_id(k, as_word, 2, &word_id) < 0)
+ {
+ ZoO_PROG_ERROR
+ (
+ "'%s' was defined as a punctuation, was found in a string, yet is not"
+ " defined in the knowledge database.",
+ as_word
+ );
+
+ return -1;
+ }
+
+ if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int word_is_punctuation_terminated
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length
+)
+{
+ return ZoO_char_is_punctuation(string[word_length]);
+}
+
+/******************************************************************************/
+/** HANDLING WORDS ************************************************************/
+/******************************************************************************/
+static int add_word_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index word_id;
+ ZoO_char * stored_word;
+
+ if (word_length == 0)
+ {
+ return 0;
+ }
+
+ if
+ (
+ ZoO_knowledge_learn_word
+ (
+ k,
+ (string + word_start),
+ word_length,
+ &word_id
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ if (add_word_id_to_sequence(word_id, sequence, sequence_length) < 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int add_finding_to_sequence
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index word_start,
+ const ZoO_index word_length,
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1],
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index punctuation;
+
+ if (word_is_punctuation_terminated(string, word_start, word_length))
+ {
+ punctuation = 1;
+ }
+ else
+ {
+ punctuation = 0;
+ }
+
+ if
+ (
+ add_word_to_sequence
+ (
+ string,
+ word_start,
+ (word_length - punctuation),
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ if
+ (
+ (punctuation == 1)
+ &&
+ (
+ add_punctuation_to_sequence
+ (
+ string,
+ string[word_start + word_length - 1],
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ )
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int find_word
+(
+ const ZoO_char string [const restrict static 1],
+ const ZoO_index string_length,
+ const ZoO_index offset,
+ ZoO_index word_start [const restrict static 1],
+ ZoO_index word_length [const restrict static 1]
+)
+{
+ ZoO_index i;
+
+ i = offset;
+
+ while ((string[i] == ' ') && (i < string_length))
+ {
+ i += 1;
+ }
+
+ if (i >= string_length)
+ {
+ return -1;
+ }
+
+ *word_start = i;
+
+ while ((string[i] != ' ') && (i < string_length))
+ {
+ i += 1;
+ }
+
+ if (i >= string_length)
+ {
+ return -1;
+ }
+
+ *word_length = (i - *word_start);
+
+ return 0;
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+int ZoO_sequence_from_undercase_string
+(
+ const ZoO_char string [const restrict],
+ const ZoO_index string_length,
+ struct ZoO_knowledge k [const restrict static 1],
+ ZoO_index * sequence [const restrict static 1],
+ ZoO_index sequence_length [const restrict static 1]
+)
+{
+ ZoO_index word_start, word_length;
+ ZoO_index i;
+
+ i = 0;
+
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ if
+ (
+ add_word_id_to_sequence
+ (
+ ZoO_START_OF_SEQUENCE_ID,
+ sequence,
+ sequence_length
+ ) < 0
+ )
+ {
+ return -1;
+ }
+
+ while (i < string_length)
+ {
+ if (find_word(string, i, string_length, &word_start, &word_length) < 0)
+ {
+ break;
+ }
+
+ if
+ (
+ add_finding_to_sequence
+ (
+ string,
+ word_start,
+ word_length,
+ sequence,
+ sequence_length,
+ k
+ ) < 0
+ )
+ {
+ free((void *) *sequence);
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ return -1;
+ }
+
+ i = (word_start + word_length);
+ }
+
+ if
+ (
+ add_word_id_to_sequence
+ (
+ ZoO_END_OF_SEQUENCE_ID,
+ sequence,
+ sequence_length
+ ) < 0
+ )
+ {
+ free((void *) *sequence);
+
+ *sequence = (ZoO_index *) NULL;
+ *sequence_length = 0;
+
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/core/sequence_types.h b/src/core/sequence_types.h
index 717d418..c260a8a 100644
--- a/src/core/sequence_types.h
+++ b/src/core/sequence_types.h
@@ -3,7 +3,8 @@
#define ZoO_START_OF_SEQUENCE_ID 0
#define ZoO_END_OF_SEQUENCE_ID 1
+#define ZoO_ACTION_SEQUENCE_ID 2
-#define ZoO_RESERVED_IDS_COUNT 2
+#define ZoO_RESERVED_IDS_COUNT 3
#endif
diff --git a/src/io/data_input.c b/src/file/data_input.c
index e31d33b..e31d33b 100644
--- a/src/io/data_input.c
+++ b/src/file/data_input.c
diff --git a/src/io/data_input.h b/src/file/data_input.h
index a2f004b..a2f004b 100644
--- a/src/io/data_input.h
+++ b/src/file/data_input.h
diff --git a/src/io/data_input_types.h b/src/file/data_input_types.h
index bd2709b..bd2709b 100644
--- a/src/io/data_input_types.h
+++ b/src/file/data_input_types.h
diff --git a/src/io/data_output.c b/src/file/data_output.c
index 796d3d0..04e3964 100644
--- a/src/io/data_output.c
+++ b/src/file/data_output.c
@@ -1,4 +1,5 @@
#define _POSIX_C_SOURCE 200809L
+
#include <stdlib.h>
#include <string.h>
#include <errno.h>
diff --git a/src/io/data_output.h b/src/file/data_output.h
index ef963a0..ef963a0 100644
--- a/src/io/data_output.h
+++ b/src/file/data_output.h
diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt
deleted file mode 100644
index c36413a..0000000
--- a/src/io/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-set(
- SRC_FILES ${SRC_FILES}
- ${CMAKE_CURRENT_SOURCE_DIR}/parameters.c
- ${CMAKE_CURRENT_SOURCE_DIR}/network.c
- ${CMAKE_CURRENT_SOURCE_DIR}/data_input.c
- ${CMAKE_CURRENT_SOURCE_DIR}/data_output.c
-)
-set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
-
diff --git a/src/io/parameters_types.h b/src/io/parameters_types.h
deleted file mode 100644
index 92a9e30..0000000
--- a/src/io/parameters_types.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _ZoO_IO_PARAMETERS_TYPES_H_
-#define _ZoO_IO_PARAMETERS_TYPES_H_
-
-struct ZoO_parameters
-{
- const char * restrict data_filename;
- const char * restrict new_data_filename;
-
- const char * restrict irc_server_addr;
- const char * restrict irc_server_port;
- const char * restrict irc_server_channel;
- const char * restrict irc_username;
- const char * restrict irc_realname;
-
- int reply_rate;
-
- int aliases_count;
- const char * restrict * restrict aliases;
-};
-
-#endif
diff --git a/src/io/network.c b/src/irc/network.c
index edafd4f..edafd4f 100644
--- a/src/io/network.c
+++ b/src/irc/network.c
diff --git a/src/io/network.h b/src/irc/network.h
index 647b19c..647b19c 100644
--- a/src/io/network.h
+++ b/src/irc/network.h
diff --git a/src/io/network_types.h b/src/irc/network_types.h
index 9a328a7..9a328a7 100644
--- a/src/io/network_types.h
+++ b/src/irc/network_types.h
diff --git a/src/knowledge/CMakeLists.txt b/src/knowledge/CMakeLists.txt
new file mode 100644
index 0000000..1245321
--- /dev/null
+++ b/src/knowledge/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(
+ SRC_FILES ${SRC_FILES}
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_finalize.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_sequence.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_learn_word.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/knowledge_search.c
+)
+
+set(SRC_FILES ${SRC_FILES} PARENT_SCOPE)
+
diff --git a/src/core/knowledge.c b/src/knowledge/knowledge.c
index 94d76cd..a72969e 100644
--- a/src/core/knowledge.c
+++ b/src/knowledge/knowledge.c
@@ -2,14 +2,20 @@
#include <string.h>
#include <stdint.h> /* defines SIZE_MAX */
-#include "../io/error.h"
+#include "../cli/cli.h"
#include "knowledge.h"
/** Basic functions of the ZoO_knowledge structure ****************************/
+
+/* See: "knowledge.h" */
void ZoO_knowledge_initialize (struct ZoO_knowledge k [const static 1])
{
k->words = (struct ZoO_knowledge_word *) NULL;
k->words_length = 0;
k->words_sorted = (ZoO_index *) NULL;
+
+ k->sequences = (ZoO_index **) NULL;
+ k->sequences_length = 0;
+ k->sequences_sorted = (ZoO_index *) NULL;
}
diff --git a/src/core/knowledge.h b/src/knowledge/knowledge.h
index 057e436..51d94c4 100644
--- a/src/core/knowledge.h
+++ b/src/knowledge/knowledge.h
@@ -1,5 +1,5 @@
-#ifndef _ZoO_CORE_KNOWLEDGE_H_
-#define _ZoO_CORE_KNOWLEDGE_H_
+#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_H_
+#define _ZoO_KNOWLEDGE_KNOWLEDGE_H_
#include "../core/char_types.h"
#include "../core/index_types.h"
@@ -10,11 +10,9 @@ void ZoO_knowledge_initialize (struct ZoO_knowledge k [const restrict static 1])
void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]);
-
/*
* When returning 0:
- * {word} was either added to {k} or its representation in {k} has its
- * occurrences count increased.
+ * {word} was added to {k}, or was already there.
* {*result} indicates where {word} is in {k->words}.
*
* When returning -1:
@@ -22,18 +20,28 @@ void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1]);
* {k} remains semantically unchanged.
* {*result} may or may not have been altered.
*/
-int ZoO_knowledge_learn
+int ZoO_knowledge_learn_word
(
struct ZoO_knowledge k [const static 1],
const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
ZoO_index result [const restrict static 1]
);
int ZoO_knowledge_learn_sequence
(
- struct ZoO_knowledge k [const static 1],
- const ZoO_index sequence [const restrict],
- const ZoO_index sequence_length
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order
+);
+
+int ZoO_knowledge_learn_markov_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order
);
int ZoO_knowledge_get_following_sequences_ref
@@ -74,6 +82,7 @@ int ZoO_knowledge_find_word_id
(
const struct ZoO_knowledge k [const restrict static 1],
const ZoO_char word [const restrict static 1],
+ const size_t word_size,
ZoO_index result [const restrict static 1]
);
diff --git a/src/core/knowledge_finalize.c b/src/knowledge/knowledge_finalize.c
index e4deda6..36a7406 100644
--- a/src/core/knowledge_finalize.c
+++ b/src/knowledge/knowledge_finalize.c
@@ -2,11 +2,11 @@
#include <string.h>
#include <stdint.h> /* defines SIZE_MAX */
-#include "../io/error.h"
+#include "../cli/cli.h"
#include "knowledge.h"
-void knowledge_sequence_collection_finalize
+static void knowledge_sequence_collection_finalize
(
struct ZoO_knowledge_sequence_collection c [const restrict static 1]
)
@@ -54,7 +54,7 @@ void knowledge_sequence_collection_finalize
}
}
-void knowledge_word_finalize
+static void knowledge_word_finalize
(
struct ZoO_knowledge_word w [const restrict static 1]
)
@@ -73,6 +73,7 @@ void knowledge_word_finalize
knowledge_sequence_collection_finalize(&(w->preceded));
}
+/* See: "knowledge.h" */
void ZoO_knowledge_finalize (struct ZoO_knowledge k [const restrict static 1])
{
ZoO_index i;
diff --git a/src/knowledge/knowledge_learn_sequence.c b/src/knowledge/knowledge_learn_sequence.c
new file mode 100644
index 0000000..23a5ca7
--- /dev/null
+++ b/src/knowledge/knowledge_learn_sequence.c
@@ -0,0 +1,324 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../core/sequence.h"
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/******************************************************************************/
+/** INITIALIZE ****************************************************************/
+/******************************************************************************/
+static void set_nth_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sorted_sequence_id,
+ const ZoO_index sequence_id
+)
+{
+ /* Safe: (> k->sequences_length 1) */
+ if (sorted_sequence_id < (k->sequences_length - 1))
+ {
+ memmove
+ (
+ /* Safe: (=< (+ sorted_sequence_id 1) k->sequences_length) */
+ (void *) (k->sequences_sorted + (sorted_sequence_id + 1)),
+ (const void *) (k->sequences_sorted + sorted_sequence_id),
+ ((k->sequences_length - 1) - sorted_sequence_id)
+ );
+ }
+
+ k->sequences_sorted[sorted_sequence_id] = sequence_id;
+}
+
+/******************************************************************************/
+/** ALLOCATING MEMORY *********************************************************/
+/******************************************************************************/
+static int reallocate_sequences_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index ** new_sequences;
+
+ if ((SIZE_MAX / sizeof(ZoO_index *)) > (size_t) k->sequences_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sequences list, as it would overflow"
+ "size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_sequences =
+ (ZoO_index **) realloc
+ (
+ (void *) k->sequences,
+ (((size_t) k->sequences_length) * sizeof(ZoO_index *))
+ );
+
+ if (new_sequences == (ZoO_index **) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sequence list."
+ );
+
+ return -1;
+ }
+
+ k->sequences = new_sequences;
+
+ return 0;
+}
+
+static int reallocate_sequences_sorted_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index * new_sequences_sorted;
+
+ if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->sequences_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sorted sequences list, as it would"
+ " overflow size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_sequences_sorted =
+ (ZoO_index *) realloc
+ (
+ (void *) k->sequences_sorted,
+ ((size_t) k->sequences_length) * sizeof(ZoO_index)
+ );
+
+ if (new_sequences_sorted == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sorted sequences"
+ " list."
+ );
+
+ return -1;
+ }
+
+ k->sequences_sorted = new_sequences_sorted;
+
+ return 0;
+}
+
+/* Pre: (=< ZoO_INDEX_MAX SIZE_MAX) */
+static ZoO_index * copy_sequence
+(
+ const ZoO_index base [const restrict static 1],
+ const ZoO_index base_length,
+ const ZoO_index markov_order
+)
+{
+ ZoO_index * result;
+
+ result = (ZoO_index *) calloc((size_t) base_length, sizeof(ZoO_index));
+
+ if (result == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required to store a new sequence."
+ );
+
+ return (ZoO_index *) NULL;
+ }
+
+ memcpy
+ (
+ (void *) result,
+ (const void *) base,
+ (((size_t) base_length) * sizeof(ZoO_index))
+ );
+
+ return result;
+}
+
+static int add_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre (> markov_order 1) */
+ const ZoO_index sequence_id,
+ const ZoO_index sorted_sequence_id
+)
+{
+ ZoO_index * stored_sequence;
+
+ if (k->sequences_length == ZoO_INDEX_MAX)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to add sequence: the variable that stores the number of known "
+ "sequences would overflow."
+ );
+
+ return -1;
+ }
+
+ stored_sequence = copy_sequence(sequence, sequence_length, markov_order);
+
+ if (stored_sequence == (ZoO_index *) NULL)
+ {
+ return -1;
+ }
+
+ k->sequences_length += 1;
+
+ if (reallocate_sequences_list(k) < 0)
+ {
+ k->sequences_length -= 1;
+
+ return -1;
+ }
+
+ k->sequences[sequence_id] = stored_sequence;
+
+ if (reallocate_sequences_sorted_list(k) < 0)
+ {
+ k->sequences_length -= 1;
+
+ return -1;
+ }
+
+ set_nth_sequence(k, sorted_sequence_id, sequence_id);
+
+ return -1;
+}
+
+/******************************************************************************/
+/** SEARCH ********************************************************************/
+/******************************************************************************/
+
+static int find_sequence
+(
+ const struct ZoO_knowledge k [const static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre: (> 1) */
+ ZoO_index sequence_id [const restrict static 1]
+)
+{
+ /* This is a binary search */
+ int cmp;
+ ZoO_index i, current_min, current_max;
+ const ZoO_index markov_sequence_length = (markov_order - 1);
+
+ /* Handles the case where the list is empty ********************************/
+ current_max = k->sequences_length;
+
+ if (current_max == 0)
+ {
+ *sequence_id = 0;
+
+ return -1;
+ }
+ /***************************************************************************/
+
+ current_min = 0;
+ current_max -= 1;
+
+ for (;;)
+ {
+ i = (current_min + ((current_max - current_min) / 2));
+
+ cmp =
+ ZoO_sequence_cmp
+ (
+ k->sequences[k->sequences_sorted[i]],
+ markov_sequence_length,
+ sequence,
+ sequence_length
+ );
+
+ if (cmp > 0)
+ {
+ current_min = (i + 1);
+
+ if (current_min > current_max)
+ {
+ *sequence_id = current_min;
+
+ return -1;
+ }
+ }
+ else if (cmp < 0)
+ {
+ if ((current_min > current_max) || (i == 0))
+ {
+ *sequence_id = i;
+
+ return -1;
+ }
+
+ current_max = (i - 1);
+ }
+ else
+ {
+ *sequence_id = k->sequences_sorted[i];
+
+ return 0;
+ }
+ }
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+
+int ZoO_knowledge_learn_markov_sequence
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sequence [const restrict static 1],
+ const ZoO_index sequence_length,
+ const ZoO_index markov_order, /* Pre (> markov_order 1) */
+ ZoO_index sequence_id [const restrict static 1]
+)
+{
+ ZoO_index sorted_id;
+
+ if
+ (
+ find_sequence
+ (
+ k,
+ sequence,
+ sequence_length,
+ markov_order,
+ sequence_id
+ ) == 0
+ )
+ {
+ return 0;
+ }
+
+ sorted_id = *sequence_id;
+ *sequence_id = k->sequences_length;
+
+ return
+ add_sequence
+ (
+ k,
+ sequence,
+ sequence_length,
+ markov_order,
+ *sequence_id,
+ sorted_id
+ );
+}
diff --git a/src/knowledge/knowledge_learn_word.c b/src/knowledge/knowledge_learn_word.c
new file mode 100644
index 0000000..f55ac5b
--- /dev/null
+++ b/src/knowledge/knowledge_learn_word.c
@@ -0,0 +1,276 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h> /* defines SIZE_MAX */
+
+#include "../cli/cli.h"
+
+#include "knowledge.h"
+
+/******************************************************************************/
+/** INITIALIZING STRUCTURES ***************************************************/
+/******************************************************************************/
+
+static void initialize_sequence_collection
+(
+ struct ZoO_knowledge_sequence_collection c [const restrict static 1]
+)
+{
+ c->sequences_ref = (ZoO_index *) NULL;
+ c->sequences_ref_length = 0;
+ c->sequences_ref_sorted = (ZoO_index *) NULL;
+ c->occurrences = (ZoO_index *) NULL;
+ c->targets = (ZoO_index **) NULL;
+ c->targets_length = (ZoO_index *) NULL;
+ c->targets_occurrences = (ZoO_index **) NULL;
+}
+
+static void initialize_word
+(
+ struct ZoO_knowledge_word w [const restrict static 1]
+)
+{
+ w->word = (const ZoO_char *) NULL;
+ w->word_size = 0;
+ w->occurrences = 0;
+
+ initialize_sequence_collection(&(w->followed));
+ initialize_sequence_collection(&(w->preceded));
+}
+
+/******************************************************************************/
+/** ALLOCATING MEMORY *********************************************************/
+/******************************************************************************/
+static ZoO_char * copy_word
+(
+ const ZoO_char original [const restrict static 1],
+ const ZoO_index original_length
+)
+{
+ ZoO_char * result;
+
+ result =
+ (ZoO_char *)
+ calloc
+ (
+ (size_t) (original_length + 1),
+ sizeof(ZoO_char)
+ );
+
+ if (result == (ZoO_char *) NULL)
+ {
+ ZoO_S_ERROR("Unable to allocate memory to store new word.");
+
+ return (ZoO_char *) NULL;
+ }
+
+ memcpy
+ (
+ (void *) result,
+ (const void *) original,
+ (((size_t) original_length) * sizeof(ZoO_char))
+ );
+
+ result[original_length] = '\0';
+
+ return 0;
+}
+
+static int reallocate_words_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ struct ZoO_knowledge_word * new_words;
+
+ if
+ (
+ (SIZE_MAX / sizeof(struct ZoO_knowledge_word)) > (size_t) k->words_length
+ )
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the words list, as it would overflow"
+ "size_t variables."
+ );
+
+ return -1;
+ }
+
+ new_words =
+ (struct ZoO_knowledge_word *) realloc
+ (
+ (void *) k->words,
+ (((size_t) k->words_length) * sizeof(struct ZoO_knowledge_word))
+ );
+
+ if (new_words == (struct ZoO_knowledge_word *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new words list."
+ );
+
+ return -1;
+ }
+
+ k->words = new_words;
+
+ return 0;
+}
+
+static int reallocate_words_sorted_list
+(
+ struct ZoO_knowledge k [const restrict static 1]
+)
+{
+ ZoO_index * new_words_sorted;
+
+ /*
+ * This has already been tested previously for a struct ZoO_knowledge_word,
+ * whose size is bigger than a ZoO_index.
+ * */
+ /*
+ if ((SIZE_MAX / sizeof(ZoO_index)) > (size_t) k->words_length)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to store the size of the sorted words list, as it would"
+ " overflow size_t variables."
+ );
+
+ return -1;
+ }
+ */
+
+ new_words_sorted =
+ (ZoO_index *) realloc
+ (
+ (void *) k->words_sorted,
+ (((size_t) k->words_length) * sizeof(ZoO_index))
+ );
+
+ if (new_words_sorted == (ZoO_index *) NULL)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to allocate the memory required for the new sorted words list."
+ );
+
+ return -1;
+ }
+
+ k->words_sorted = new_words_sorted;
+
+ return 0;
+}
+
+static void set_nth_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_index sorted_word_id,
+ const ZoO_index word_id
+)
+{
+ /* Safe: (> k->words_length 1) */
+ if (sorted_word_id < (k->words_length - 1))
+ {
+ memmove
+ (
+ /* Safe: (=< (+ sorted_word_id 1) k->words_length) */
+ (void *) (k->words_sorted + (sorted_word_id + 1)),
+ (const void *) (k->words_sorted + sorted_word_id),
+ ((k->words_length - 1) - sorted_word_id)
+ );
+ }
+
+ k->words_sorted[sorted_word_id] = word_id;
+}
+
+static int add_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
+ const ZoO_index word_id,
+ const ZoO_index sorted_word_id
+)
+{
+ ZoO_char * stored_word;
+
+ if (k->words_length == ZoO_INDEX_MAX)
+ {
+ ZoO_S_ERROR
+ (
+ "Unable to add word: the variable that stores the number of known "
+ "words would overflow."
+ );
+
+ return -1;
+ }
+
+ stored_word = copy_word(word, word_length);
+
+ if (stored_word == (ZoO_char *) NULL)
+ {
+ return -1;
+ }
+
+ k->words_length += 1;
+
+ if (reallocate_words_list(k) < 0)
+ {
+ k->words_length -= 1;
+
+ return -1;
+ }
+
+ initialize_word(k->words + word_id);
+
+ k->words[word_id].word = stored_word;
+ k->words[word_id].word_size = ((word_length + 1) * sizeof(ZoO_char));
+
+ if (reallocate_words_sorted_list(k) < 0)
+ {
+ k->words_length -= 1;
+
+ return -1;
+ }
+
+ set_nth_word(k, sorted_word_id, word_id);
+
+ return -1;
+}
+
+/******************************************************************************/
+/** EXPORTED ******************************************************************/
+/******************************************************************************/
+
+int ZoO_knowledge_learn_word
+(
+ struct ZoO_knowledge k [const restrict static 1],
+ const ZoO_char word [const restrict static 1],
+ const ZoO_index word_length,
+ ZoO_index word_id [const restrict static 1]
+)
+{
+ ZoO_index sorted_id;
+
+ if
+ (
+ ZoO_knowledge_find_word_id
+ (
+ k,
+ word,
+ (word_length * sizeof(ZoO_char)),
+ word_id
+ ) == 0
+ )
+ {
+ return 0;
+ }
+
+ sorted_id = *word_id;
+ *word_id = k->words_length;
+
+ return add_word(k, word, word_length, *word_id, sorted_id);
+}
diff --git a/src/core/knowledge_search.c b/src/knowledge/knowledge_search.c
index d0c61ef..a48585b 100644
--- a/src/core/knowledge_search.c
+++ b/src/knowledge/knowledge_search.c
@@ -4,7 +4,7 @@
#include "../core/index.h"
#include "../core/sequence.h"
-#include "../io/error.h"
+#include "../cli/cli.h"
#include "knowledge.h"
@@ -13,6 +13,7 @@ int ZoO_knowledge_find_word_id
(
const struct ZoO_knowledge k [const restrict static 1],
const ZoO_char word [const restrict static 1],
+ const size_t word_size,
ZoO_index result [const restrict static 1]
)
{
@@ -39,7 +40,7 @@ int ZoO_knowledge_find_word_id
{
i = (current_min + ((current_max - current_min) / 2));
- cmp = ZoO_word_cmp(word, k->words[k->words_sorted[i]].word);
+ cmp = ZoO_word_cmp(word, word_size, k->words[k->words_sorted[i]].word);
if (cmp > 0)
{
@@ -65,7 +66,7 @@ int ZoO_knowledge_find_word_id
}
else
{
- *result = i;
+ *result = k->words_sorted[i];
return 0;
}
diff --git a/src/core/knowledge_types.h b/src/knowledge/knowledge_types.h
index acd239f..7eafc8b 100644
--- a/src/core/knowledge_types.h
+++ b/src/knowledge/knowledge_types.h
@@ -1,5 +1,5 @@
-#ifndef _ZoO_CORE_KNOWLEDGE_TYPES_H_
-#define _ZoO_CORE_KNOWLEDGE_TYPES_H_
+#ifndef _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_
+#define _ZoO_KNOWLEDGE_KNOWLEDGE_TYPES_H_
#include "../core/index_types.h"
#include "../core/char_types.h"
@@ -32,6 +32,7 @@ struct ZoO_knowledge
ZoO_index ** sequences;
ZoO_index sequences_length;
ZoO_index * sequences_sorted;
+ ZoO_index sequences_length;
};
#endif
diff --git a/src/core/main.c b/src/main.c
index bb4ae23..bb4ae23 100644
--- a/src/core/main.c
+++ b/src/main.c
diff --git a/src/pervasive.h b/src/pervasive.h
index b830326..c7c53a2 100644
--- a/src/pervasive.h
+++ b/src/pervasive.h
@@ -3,6 +3,12 @@
#include <limits.h>
+#define ZoO_DEBUG_ALL 1
+
+#ifndef ZoO_DEBUG_ALL
+ #define ZoO_DEBUG_ALL 0
+#endif
+
#ifndef ZoO_NETWORK_TIMEOUT
#define ZoO_NETWORK_TIMEOUT 200
#endif
@@ -11,34 +17,6 @@
#define ZoO_MAX_REPLY_WORDS 64
#endif
-#ifndef ZoO_DEFAULT_DATA_FILENAME
- #define ZoO_DEFAULT_DATA_FILENAME "./memory.txt"
-#endif
-
-#ifndef ZoO_DEFAULT_IRC_SERVER_ADDR
- #define ZoO_DEFAULT_IRC_SERVER_ADDR "irc.foonetic.net"
-#endif
-
-#ifndef ZoO_DEFAULT_IRC_SERVER_PORT
- #define ZoO_DEFAULT_IRC_SERVER_PORT "6667"
-#endif
-
-#ifndef ZoO_DEFAULT_IRC_SERVER_CHANNEL
- #define ZoO_DEFAULT_IRC_SERVER_CHANNEL "#theborghivemind"
-#endif
-
-#ifndef ZoO_DEFAULT_IRC_USERNAME
- #define ZoO_DEFAULT_IRC_USERNAME "zeroofone"
-#endif
-
-#ifndef ZoO_DEFAULT_IRC_REALNAME
- #define ZoO_DEFAULT_IRC_REALNAME "Zero of One (bot)"
-#endif
-
-#ifndef ZoO_DEFAULT_REPLY_RATE
- #define ZoO_DEFAULT_REPLY_RATE 8
-#endif
-
#define ZoO__TO_STRING(x) #x
#define ZoO_TO_STRING(x) ZoO__TO_STRING(x)
#define ZoO_ISOLATE(a) do {a} while (0)