From 1373211465c34015ee900e097aa87fbffb401187 Mon Sep 17 00:00:00 2001 From: Nathanael Sensfelder Date: Sun, 29 Jan 2017 19:54:26 +0100 Subject: Trying out ACSL, continuing implementation. --- src/sequence/CMakeLists.txt | 11 + src/sequence/sequence.c | 170 ++++++++++ src/sequence/sequence.h | 208 ++++++++++++ src/sequence/sequence_append.c | 212 +++++++++++++ src/sequence/sequence_creation.c | 618 ++++++++++++++++++++++++++++++++++++ src/sequence/sequence_from_string.c | 214 +++++++++++++ src/sequence/sequence_to_string.c | 189 +++++++++++ src/sequence/sequence_types.h | 9 + 8 files changed, 1631 insertions(+) create mode 100644 src/sequence/CMakeLists.txt create mode 100644 src/sequence/sequence.c create mode 100644 src/sequence/sequence.h create mode 100644 src/sequence/sequence_append.c create mode 100644 src/sequence/sequence_creation.c create mode 100644 src/sequence/sequence_from_string.c create mode 100644 src/sequence/sequence_to_string.c create mode 100644 src/sequence/sequence_types.h (limited to 'src/sequence') diff --git a/src/sequence/CMakeLists.txt b/src/sequence/CMakeLists.txt new file mode 100644 index 0000000..1186557 --- /dev/null +++ b/src/sequence/CMakeLists.txt @@ -0,0 +1,11 @@ +set( + SRC_FILES ${SRC_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_append.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_from_string.c + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_to_string.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/sequence/sequence.c b/src/sequence/sequence.c new file mode 100644 index 0000000..852b05c --- /dev/null +++ b/src/sequence/sequence.c @@ -0,0 +1,170 @@ +#include +#include + +#include "../core/index.h" + +#include "sequence.h" + +/* + * Bypass rendundant ZoO_START_OF_SEQUENCE_ID at the start of a sequence. + */ +/*@ + requires + ( + \valid(sequence+ (0 .. sequence_length)) + || (sequence_length == 0) + ); + requires \separated(sequence, sequence_offset); + + assigns (*sequence_offset); + + ensures (*sequence_offset < sequence_length); + + ensures + ( + (*sequence_offset == 0) + || (sequence[0 .. *sequence_offset] == ZoO_START_OF_SEQUENCE_ID) + ); + + ensures + ( + (*sequence_offset == sequence_length) + || (sequence[*sequence_offset + 1] != ZoO_START_OF_SEQUENCE_ID) + ); + +@*/ +static void bypass_redundant_sos +( + const ZoO_index sequence [const restrict], + const size_t sequence_length, + size_t sequence_offset [const restrict static 1] +) +{ + ZoO_index i; + + *sequence_offset = 0; + + /*@ + loop invariant 0 <= i <= sequence_length; + loop invariant (*sequence_offset <= i); + loop invariant + ( + (*sequence_offset == 0) + || (sequence[*sequence_offset] == ZoO_START_OF_SEQUENCE_ID) + ); + loop invariant + ( + (i == 0) || (sequence[0 .. (i - 1)] == ZoO_START_OF_SEQUENCE_ID) + ); + loop assigns i, *sequence_offset; + loop variant (sequence_length - i); + @*/ + for (i = 0; i < sequence_length; ++i) + { + if (sequence[i] != ZoO_START_OF_SEQUENCE_ID) + { + return; + } + else if (sequence[i] == ZoO_START_OF_SEQUENCE_ID) + { + *sequence_offset = i; + } + } +} + + +/* See "sequence.h" */ +/*@ + \requires + ( + \valid(sequence_a+ (0 .. sequence_a_length)) + || (sequence_a_length == 0) + ); + + \requires + ( + \valid(sequence_b+ (0 .. sequence_b_length)) + || (sequence_b_length == 0) + ); + + assigns \result; +@*/ +int ZoO_sequence_cmp +( + const ZoO_index sequence_a [const], + size_t sequence_a_length, + const ZoO_index sequence_b [const], + size_t sequence_b_length +) +{ + size_t min_length, a, b; + size_t a_offset, b_offset; + size_t i; + /*@ ghost size_t actual_a_length, actual_b_length; @*/ + + bypass_redundant_sos(sequence_a, sequence_a_length, &a_offset); + bypass_redundant_sos(sequence_b, sequence_b_length, &b_offset); + + /*@ ghost actual_a_length = sequence_a_length; @*/ + /*@ ghost actual_b_length = sequence_b_length; @*/ + + /*@ assert (a_offset <= sequence_a_length); @*/ + sequence_a_length -= a_offset; + /*@ assert (b_offset <= sequence_b_length); @*/ + sequence_b_length -= b_offset; + + if (sequence_a_length < sequence_b_length) + { + min_length = sequence_a_length; + } + else + { + min_length = sequence_b_length; + } + + /*@ assert (min_length <= sequence_a_length); @*/ + /*@ assert (min_length <= sequence_b_length); @*/ + + /*@ assert (min_length + a_offset <= actual_a_length); @*/ + /*@ assert (min_length + b_offset <= actual_b_length); @*/ + + /*@ + loop invariant 0 <= i <= min_length; + loop assigns i; + loop variant (min_length - i); + @*/ + for (i = 0; i < min_length; ++i) + { + /*@ assert ((i + a_offset) < actual_a_length); @*/ + a = sequence_a[i + a_offset]; + /*@ assert ((i + b_offset) < actual_b_length); @*/ + b = sequence_b[i + b_offset]; + + if (a < b) + { + return -1; + } + else if (b > a) + { + return 1; + } + else if ((a == ZoO_END_OF_SEQUENCE_ID) && (b == ZoO_END_OF_SEQUENCE_ID)) + { + return 0; + } + } + + if (sequence_a_length > sequence_b_length) + { + return 1; + } + else if (sequence_a_length < sequence_b_length) + { + return -1; + } + else + { + return 0; + } +} + diff --git a/src/sequence/sequence.h b/src/sequence/sequence.h new file mode 100644 index 0000000..d1ac16f --- /dev/null +++ b/src/sequence/sequence.h @@ -0,0 +1,208 @@ +#ifndef _ZoO_CORE_SEQUENCE_H_ +#define _ZoO_CORE_SEQUENCE_H_ + +/* Defines SIZE_MAX */ +#include + +#include "../core/char_types.h" +#include "../core/index_types.h" + +#include "../pipe/pipe.h" + +#include "../knowledge/knowledge_types.h" + +#include "sequence_types.h" + +/*@ + requires \valid(sequence); + requires \valid(*sequence); + requires \valid(sequence_capacity); + requires \valid(io); + requires ((sequence_required_capacity * sizeof(ZoO_index)) <= SIZE_MAX); + requires (*sequence_capacity >= 0); + requires (*sequence_capacity <= SIZE_MAX); + requires (((*sequence_capacity) * sizeof(ZoO_index)) <= SIZE_MAX); + requires \separated(sequence, sequence_capacity, io); + + ensures + ( + ( + (\result >= 0) + && (*sequence_capacity >= sequence_required_capacity) + && ((*sequence_capacity * sizeof(ZoO_index)) <= SIZE_MAX) + ) + || (\result < 0) + ); + + behavior successful_reallocation: + assigns (*sequence); + assigns (*sequence_capacity); + + ensures (\result > 0); + ensures ((*sequence_capacity) == sequence_required_capacity); + ensures \valid(sequence); + + behavior already_done: + assigns \nothing; + + ensures (\result == 0); + ensures ((*sequence_capacity) >= sequence_required_capacity); + + behavior failure: + assigns \nothing; + + ensures (\result < 0); + + complete behaviors; + disjoint behaviors; +@*/ +int ZoO_sequence_ensure_capacity +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + const size_t sequence_required_capacity, + const struct ZoO_pipe io [const restrict static 1] +); + +int ZoO_sequence_from_undercase_string +( + const ZoO_char string [const restrict], + const size_t string_length, + struct ZoO_knowledge k [const restrict static 1], + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +); + +/* + * Creates a sequence containing {initial_word}. The remaining elements of + * sequence are added according to what is known to {k} as being possible. + * The resulting sequence starts by ZoO_START_OF_SEQUENCE_ID, and ends by + * ZoO_END_OF_SEQUENCE_ID. The sequence is allocated by the function. If an + * error occur, it is unallocated and set to NULL ({sequence_size} is set + * accordingly). + * Return: + * 0 on success. + * -1 iff the allocating failed. + * -2 iff the sequence initialization failed. + * -3 iff an error occured when trying to add elements to the right of the + * sequence. + * -4 iff an error occured when trying to add elements to the left of the + * sequence. + * -5 iff the resulting sequence would have been empty. + * Pre: + * (> {markov_order} 0) + * (knows {k} {initial_word}) + * (initialized {k}) + */ +int ZoO_sequence_create_from +( + const ZoO_index initial_word, + size_t credits [const restrict], + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index markov_order, + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +); + +/*@ + requires \valid(sequence_length); + requires \valid(io); + requires (((*sequence_length) * sizeof(ZoO_index)) <= SIZE_MAX); + requires (((*sequence_capacity) * sizeof(ZoO_index)) <= SIZE_MAX); + requires \separated(sequence, sequence_capacity, sequence_length, io); + + behavior success: + assigns (*sequence_length); + assigns (*sequence[0]); + assigns (*sequence_capacity); + + ensures (\result >= 0); + ensures ((*sequence_length > \old(*sequence_length))); + + behavior could_not_increase_length: + ensures (\result == -1); + + assigns (*sequence_length); + + behavior could_not_reallocate: + ensures (\result < -1); + + complete behaviors; + disjoint behaviors; +@*/ +int ZoO_sequence_append_left +( + const ZoO_index word_id, + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +); + +/*@ + requires \valid(*sequence); + requires \valid(sequence_capacity); + requires \valid(sequence_length); + requires \valid(io); + requires (((*sequence_length) * sizeof(ZoO_index)) <= SIZE_MAX); + requires (((*sequence_capacity) * sizeof(ZoO_index)) <= SIZE_MAX); + requires \separated(sequence, sequence_capacity, sequence_length, io); + + behavior success: + assigns (*sequence_length); + assigns (*sequence); + assigns (*sequence_capacity); + + ensures (\result >= 0); + ensures ((*sequence_length > \old(*sequence_length))); + + behavior could_not_increase_length: + ensures (\result == -1); + + assigns (*sequence_length); + + behavior could_not_reallocate: + ensures (\result < -1); + + complete behaviors; + disjoint behaviors; +@*/ +int ZoO_sequence_append_right +( + ZoO_index * sequence [const restrict static 1], + const ZoO_index word_id, + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +); + +/* + * Compares two sequences. + * ZoO_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated + * sequence length, meaning that [10][ZoO_END_OF_SEQUENCE][9] and + * [10][ZoO_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to + * contain ZoO_END_OF_SEQUENCE. [10][ZoO_END_OF_SEQUENCE] and [10] are + * considered different, [10][ZoO_END_OF_SEQUENCE] + * and [10][ZoO_END_OF_SEQUENCE][ZoO_END_OF_SEQUENCE] are considered equal. + * Same logic is applyied for ZoO_START_OF_SEQUENCE: + * [START_OF_SEQUENCE][10] is not [10], but + * [START_OF_SEQUENCE][START_OF_SEQUENCE][10] and [START_OF_SEQUENCE][10] are + * the same. + * Return: + * 1 iff {sequence_a} should be considered being more than {sequence_b} + * 0 iff {sequence_a} should be considered being equal to {sequence_b} + * -1 iff {sequence_a} should be considered being less than {sequence_b} + */ +int ZoO_sequence_cmp +( + const ZoO_index sequence_a [const], + size_t sequence_a_length, + const ZoO_index sequence_b [const], + size_t sequence_b_length +); + +#endif diff --git a/src/sequence/sequence_append.c b/src/sequence/sequence_append.c new file mode 100644 index 0000000..a119d8b --- /dev/null +++ b/src/sequence/sequence_append.c @@ -0,0 +1,212 @@ +#include +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/index.h" + +#include "../pipe/pipe.h" + +#include "sequence.h" + +/******************************************************************************/ +/** MEMORY (RE)ALLOCATION *****************************************************/ +/******************************************************************************/ + +/*@ + requires \valid(required_capacity); + requires \valid(io); + requires (*required_capacity >= 0); + requires (*required_capacity <= SIZE_MAX); + requires \separated(required_capacity, io); + + assigns \result; + + ensures (*required_capacity >= 0); + + behavior success: + assigns (*required_capacity); + + ensures (\result >= 0); + ensures ((*required_capacity) == (\old(*required_capacity) + 1)); + ensures (((*required_capacity) * sizeof(ZoO_index)) <= SIZE_MAX); + + behavior failure: + assigns \nothing; + + ensures (\result < 0); + + complete behaviors; + disjoint behaviors; +@*/ +static int increment_required_capacity +( + size_t required_capacity [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + if + ( + (*required_capacity == SIZE_MAX) + || ((SIZE_MAX / sizeof(ZoO_index)) <= (*required_capacity + 1)) + ) + { + /*@ assert \valid(io); @*/ + /*@ ghost return -1; @*/ + ZoO_S_ERROR + ( + io, + "Sequence capacity increment aborted, as the new size would not fit" + " in a size_t variable." + ); + + return -1; + } + + /*@ assert ((*required_capacity) < SIZE_MAX); @*/ + /*@ assert \valid(required_capacity); @*/ + *required_capacity = (*required_capacity + 1); + + /* assert (((*required_capacity) * sizeof(ZoO_index)) <= SIZE_MAX); @*/ + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int ZoO_sequence_ensure_capacity +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + const size_t sequence_required_capacity, + const struct ZoO_pipe io [const restrict static 1] +) +{ + ZoO_index * new_sequence; + + if (sequence_required_capacity <= *sequence_capacity) + { + return 0; + } + + /*@ + assert + ( + sequence_required_capacity + <= (SIZE_MAX / sizeof(ZoO_index)) + ); + @*/ + /*@ assert \valid(sequence); @*/ + /*@ assert \valid(*sequence); @*/ + + new_sequence = + (ZoO_index *) realloc + ( + (void *) *sequence, + (sequence_required_capacity * sizeof(ZoO_index)) + ); + + if (new_sequence == (ZoO_index *) NULL) + { + /*@ assert \valid(io); @*/ + /*@ ghost return -1; @*/ + + ZoO_S_ERROR + ( + io, + "Unable to reallocate memory to match sequence's required size." + ); + + return -1; + } + + *sequence_capacity = sequence_required_capacity; + *sequence = new_sequence; + + return 1; +} + +int ZoO_sequence_append_left +( + const ZoO_index word_id, + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + if (increment_required_capacity(sequence_length, io) < 0) + { + return -1; + } + + /*@ assert (((*sequence_length) * sizeof(ZoO_index)) <= SIZE_MAX); @*/ + if + ( + /* Appears to make Frama-C forget everything about *sequence_length. */ + ZoO_sequence_ensure_capacity + ( + sequence, + sequence_capacity, + *sequence_length, + io + ) < 0 + ) + { + return -2; + } + /*@ assert *sequence_length >= 0; @*/ + + if (*sequence_length > 1) + { + /*@ assert(((*sequence_length) * sizeof(ZoO_index)) <= SIZE_MAX); @*/ + + memmove + ( + (void *) (*sequence + 1), + (const void *) sequence, + (((*sequence_length) - 1) * sizeof(ZoO_index)) + ); + } + + (*sequence)[0] = word_id; + + return 0; +} + +int ZoO_sequence_append_right +( + ZoO_index * sequence [const restrict static 1], + const ZoO_index word_id, + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + if (increment_required_capacity(sequence_length, io) < 0) + { + return -1; + } + + /*@ assert ((*sequence_length * sizeof(ZoO_index)) < SIZE_MAX); @*/ + if + ( + ZoO_sequence_ensure_capacity + ( + sequence, + sequence_capacity, + *sequence_length, + io + ) < 0 + ) + { + return -1; + } + + /* assert (*sequence_length >= 1) */ + (*sequence)[*sequence_length - 1] = word_id; + + return 0; +} diff --git a/src/sequence/sequence_creation.c b/src/sequence/sequence_creation.c new file mode 100644 index 0000000..e8ae2a1 --- /dev/null +++ b/src/sequence/sequence_creation.c @@ -0,0 +1,618 @@ +#include +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/index.h" + +#include "../pipe/pipe.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +/* + * Returns a randomly chosen index pointing to a element in {weights}. + * The values of {weights} are used as weights to guide the choice. + * Returns: + * Value in [0, (length weights)[. + * Pre: + * (> weights_sum 0). + * (= (sum weights) weights_sum). + */ +/*@ + @ requires (weights_sum > 0); + @ requires \valid(weights); + @ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); +@*/ +static ZoO_index weighted_random_pick +( + const ZoO_index weights [const restrict static 1], + const ZoO_index weights_sum +) +{ + ZoO_index result, accumulator, random_number; + + accumulator = 0; + + random_number = ZoO_index_random_up_to(weights_sum); + /*@ ensures (0 <= random_number <= weights_sum); @*/ + + for (result = 0; accumulator < random_number; ++result) + { + /*@ requires (\sum(0, (\length(weights) - 1), weights) = weights_sum); @*/ + accumulator += weights[result]; + } + + return result; +} + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE LEFT ***********************************************/ +/******************************************************************************/ + +/* + * Adds an id to the left of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Semaphore: + * Takes then releases access for {k}. + * Returns: + * 0 on success. + * -1 iff nothing fitting was found. + * -2 iff the addition of that id failed. + * Pre: + * (initialized {sequence}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..({markov_order} - 1)]}) + */ +static int extend_left +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const ZoO_index markov_order, + struct ZoO_knowledge k [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + const ZoO_index * restrict preceding_words; + const ZoO_index * restrict preceding_words_weights; + ZoO_index preceding_words_weights_sum; + + (void) ZoO_knowledge_lock_access(k, io); + + if + ( + ZoO_knowledge_find_preceding_words + ( + k, + *sequence, + markov_order, + &preceding_words, + &preceding_words_weights, + &preceding_words_weights_sum, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + return -1; + } + + /* preceding_words_weights_sum > 0 */ + + if + ( + ZoO_sequence_append_left + ( + weighted_random_pick + ( + preceding_words_weights, + preceding_words_weights_sum + ), + sequence, + sequence_capacity, + sequence_length, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + return -3; + } + + (void) ZoO_knowledge_unlock_access(k, io); + + return 0; +} + +/* + * Continuously adds ids to the left of the sequence, according to what is known + * as likely to fit there. If {credits} is NULL, it will stop upon reaching + * the id indicating the start of a sequence, otherwise it will also limit to + * {*credits} words added (including the one indicating the start of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remains unfreed. + * Returns: + * 0 on success. + * -1 iff we did not manage to have ZoO_START_OF_SEQUENCE_ID as a starting + * point. This cannot be caused by lack of {*credits}, but rather by a + * memory allocation problem or a more important issue in {k}. Indeed, it + * could mean we do not know any word preceding {*sequence[0]}, not even + * ZoO_START_OF_SEQUENCE_ID. + * Pre: + * (initialized {sequence}) + * (initialized {sequence_size}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_left_part_of_sequence +( + ZoO_index * sequence [restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const ZoO_index markov_order, + size_t credits [const restrict], + struct ZoO_knowledge k [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + for (;;) + { + if ((credits == (size_t *) NULL) || (*credits > 0)) + { + if + ( + extend_left + ( + sequence, + sequence_capacity, + sequence_length, + markov_order, + k, + io + ) < 0 + ) + { + /* We are sure *sequence[0] is defined. */ + if (*sequence[0] == ZoO_START_OF_SEQUENCE_ID) + { + /* + * We failed to add a word, but it was because none should have + * been added. + */ + return 0; + } + else + { + return -1; + } + } + } + else + { + /* No more credits available, the sequence will have to start here. */ + *sequence[0] = ZoO_START_OF_SEQUENCE_ID; + + return 0; + } + + if (credits != (size_t *) NULL) + { + *credits -= 1; + } + + /* We are sure *sequence[0] is defined. */ + switch (*sequence[0]) + { + case ZoO_END_OF_SEQUENCE_ID: + ZoO_S_WARNING + ( + io, + "END OF LINE was added at the left part of an sequence." + ); + + *sequence[0] = ZoO_START_OF_SEQUENCE_ID; + return 0; + + case ZoO_START_OF_SEQUENCE_ID: + return 0; + + default: + break; + } + } +} + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE RIGHT **********************************************/ +/******************************************************************************/ + + +/* + * Adds an id to the right of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Semaphore: + * Takes then releases access for {k}. + * Returns: + * 0 on success. + * -1 iff nothing fitting was found. + * -2 iff the addition of that id failed. + * Pre: + * (initialized {sequence}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int extend_right +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const ZoO_index markov_order, + struct ZoO_knowledge k [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + const ZoO_index * restrict following_words; + const ZoO_index * restrict following_words_weights; + + ZoO_index following_words_weights_sum; + + (void) ZoO_knowledge_lock_access(k, io); + + if + ( + ZoO_knowledge_find_following_words + ( + k, + *sequence, + *sequence_length, + markov_order, + &following_words, + &following_words_weights, + &following_words_weights_sum, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + return -1; + } + + /* following_words_weights_sum > 0 */ + + if + ( + ZoO_sequence_append_right + ( + sequence, + weighted_random_pick + ( + following_words_weights, + following_words_weights_sum + ), + sequence_capacity, + sequence_length, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + return -3; + } + + (void) ZoO_knowledge_unlock_access(k, io); + + return 0; +} + +/* + * Continuously adds ids to the right of the sequence, according to what is + * known as likely to fit there. If {credits} is NULL, it will stop upon + * reaching the id indicating the end of a sequence, otherwise it will also + * limit to {*credits} words added (including the one indicating the end of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remain untouched. + * Returns: + * 0 on success. + * -1 iff we did not manage to have ZoO_END_OF_SEQUENCE_ID as a stopping + * point. This cannot be caused by lack of {*credits}, but rather by a + * memory allocation problem or a more important issue in {k}. Indeed, it + * could mean we do not know any word following {*sequence[0]}, not even + * ZoO_END_OF_SEQUENCE_ID. + * Pre: + * (initialized {sequence}) + * (initialized {*sequence_size}) + * (initialized {k}) + * (> {markov_order} 0) + * (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_right_part_of_sequence +( + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const ZoO_index markov_order, + size_t credits [const restrict], + struct ZoO_knowledge k [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + for (;;) + { + if ((credits == (size_t *) NULL) || (*credits > 0)) + { + if + ( + extend_right + ( + sequence, + sequence_capacity, + sequence_length, + markov_order, + k, + io + ) < 0 + ) + { + /* Safe: (> sequence_length 1) */ + if (*sequence[(*sequence_length - 1)] == ZoO_END_OF_SEQUENCE_ID) + { + /* + * We failed to add a word, but it was because none should have + * been added. + */ + return 0; + } + else + { + return -1; + } + } + } + else + { + /* No more credits available, we end the sequence. */ + *sequence[(*sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; + + return 0; + } + + if (credits != (size_t *) NULL) + { + *credits -= 1; + } + + /* Safe: (> sequence_length 1) */ + switch (*sequence[(*sequence_length - 1)]) + { + case ZoO_START_OF_SEQUENCE_ID: + ZoO_S_WARNING + ( + io, + "END OF LINE was added at the right part of an sequence." + ); + + *sequence[(*sequence_length - 1)] = ZoO_END_OF_SEQUENCE_ID; + return 0; + + case ZoO_END_OF_SEQUENCE_ID: + return 0; + + default: + break; + } + } +} + +/******************************************************************************/ +/** INITIALIZING SEQUENCE *****************************************************/ +/******************************************************************************/ + +/* + * Initializes an pre-allocated sequence by filling it with {initial_word} + * followed by a sequence of ({markov_order} - 1) words that is known to have + * followed {initial_word} at least once. This sequence is chosen depending on + * how often {k} indicates it has followed {initial_word}. Note that if + * {markov_order} is 1, there is no sequence added, simply {initial_word}. + * Returns: + * 0 on success. + * -1 if no such sequence was found. + * Pre: + * (size (= {sequence} {markov_order})) + * (initialized {k}) + * (> markov_order 0) + * Post: + * (initialized {sequence[0..(markov_order - 1)]}) + */ +static int initialize_sequence +( + ZoO_index sequence [const restrict static 1], + const ZoO_index initial_word, + const ZoO_index markov_order, + struct ZoO_knowledge k [const static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + const ZoO_index * restrict following_sequences_ref; + const ZoO_index * restrict chosen_sequence; + const ZoO_index * restrict following_sequences_weights; + ZoO_index following_sequences_weights_sum; + + sequence[0] = initial_word; + + if (markov_order == 1) + { + return 0; + } + + (void) ZoO_knowledge_lock_access(k, io); + + if + ( + ZoO_knowledge_get_following_sequences_ref + ( + k, + initial_word, + &following_sequences_ref, + &following_sequences_weights, + &following_sequences_weights_sum, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + ZoO_S_ERROR + ( + io, + "Unable to find any sequence that would precede the initial word." + ); + + return -1; + } + + /* following_sequences_ref contains only valid references. */ + (void) ZoO_knowledge_get_sequence + ( + k, + following_sequences_ref + [ + weighted_random_pick + ( + following_sequences_weights, + following_sequences_weights_sum + ) + ], + &chosen_sequence, + io + ); + + /* Safe if 'allocate_initial_sequence' succeeded. */ + memcpy + ( + (void *) (sequence + 1), + (const void *) chosen_sequence, + ((((size_t) markov_order) - 1) * sizeof(ZoO_index)) + ); + + (void) ZoO_knowledge_unlock_access(k, io); + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +/* See "sequence.h" */ +int ZoO_sequence_create_from +( + const ZoO_index initial_word, + size_t credits [const restrict], + struct ZoO_knowledge k [const restrict static 1], + const ZoO_index markov_order, + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + if + ( + ZoO_sequence_ensure_capacity + ( + sequence, + sequence_capacity, + markov_order, + io + ) < 0 + ) + { + *sequence_length = 0; + + return -1; + } + + if + ( + initialize_sequence + ( + *sequence, + initial_word, + markov_order, + k, + io + ) < 0 + ) + { + *sequence_length = 0; + + return -2; + } + + *sequence_length = markov_order; + + if + ( + complete_right_part_of_sequence + ( + sequence, + sequence_capacity, + sequence_length, + markov_order, + credits, + k, + io + ) < 0 + ) + { + *sequence_length = 0; + + return -3; + } + + if + ( + complete_left_part_of_sequence + ( + sequence, + sequence_capacity, + sequence_length, + markov_order, + credits, + k, + io + ) < 0 + ) + { + *sequence_length = 0; + + return -4; + } + + if (*sequence_length < 3) + { + /* 2 elements, for start and stop. */ + ZoO_S_ERROR(io, "Created sequence was empty."); + + *sequence_length = 0; + + return -5; + } + + return 0; +} diff --git a/src/sequence/sequence_from_string.c b/src/sequence/sequence_from_string.c new file mode 100644 index 0000000..cd04d70 --- /dev/null +++ b/src/sequence/sequence_from_string.c @@ -0,0 +1,214 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../pipe/pipe.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +/******************************************************************************/ +/** HANDLING WORDS ************************************************************/ +/******************************************************************************/ + +/* + * Semaphore: + * Takes then releases access for {k}. + */ +static int add_word_to_sequence +( + const ZoO_char string [const restrict static 1], + const size_t word_start, + const size_t word_length, + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + ZoO_index word_id; + ZoO_char * stored_word; + + if (word_length == 0) + { + return 0; + } + + (void) ZoO_knowledge_lock_access(k, io); + + if + ( + ZoO_knowledge_learn_word + ( + k, + (string + word_start), + word_length, + &word_id, + io + ) < 0 + ) + { + (void) ZoO_knowledge_unlock_access(k, io); + + return -1; + } + + (void) ZoO_knowledge_unlock_access(k, io); + + if + ( + ZoO_sequence_append_right + ( + sequence, + word_id, + sequence_capacity, + sequence_length, + io + ) < 0 + ) + { + return -1; + } + + return 0; +} + +static int find_word +( + const ZoO_char string [const restrict static 1], + const size_t string_length, + const size_t offset, + size_t word_start [const restrict static 1], + size_t word_length [const restrict static 1] +) +{ + size_t i; + + i = offset; + + while ((string[i] == ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_start = i; + + while ((string[i] != ' ') && (i < string_length)) + { + i += 1; + } + + if (i >= string_length) + { + return -1; + } + + *word_length = (i - *word_start); + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +/* See: "sequence.h" */ +int ZoO_sequence_from_undercase_string +( + const ZoO_char string [const restrict], + const size_t string_length, + struct ZoO_knowledge k [const restrict static 1], + ZoO_index * sequence [const restrict static 1], + size_t sequence_capacity [const restrict static 1], + size_t sequence_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + size_t word_start, word_length; + size_t i; + + i = 0; + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + if + ( + ZoO_sequence_append_right + ( + sequence, + ZoO_START_OF_SEQUENCE_ID, + sequence_capacity, + sequence_length, + io + ) < 0 + ) + { + return -1; + } + + while (i < string_length) + { + if (find_word(string, i, string_length, &word_start, &word_length) < 0) + { + break; + } + + if + ( + add_word_to_sequence + ( + string, + word_start, + word_length, + sequence, + sequence_capacity, + sequence_length, + k, + io + ) < 0 + ) + { + free((void *) *sequence); + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + i = (word_start + word_length); + } + + if + ( + ZoO_sequence_append_right + ( + sequence, + ZoO_END_OF_SEQUENCE_ID, + sequence_capacity, + sequence_length, + io + ) < 0 + ) + { + free((void *) *sequence); + + *sequence = (ZoO_index *) NULL; + *sequence_length = 0; + + return -1; + } + + return 0; +} diff --git a/src/sequence/sequence_to_string.c b/src/sequence/sequence_to_string.c new file mode 100644 index 0000000..cec3af8 --- /dev/null +++ b/src/sequence/sequence_to_string.c @@ -0,0 +1,189 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../pipe/pipe.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +/******************************************************************************/ +/** MEMORY ALLOCATION *********************************************************/ +/******************************************************************************/ +static int ensure_string_capacity +( + ZoO_char * string [const restrict static 1], + size_t string_capacity [const restrict static 1], + const size_t string_required_capacity, + const struct ZoO_pipe io [const restrict static 1] +) +{ + ZoO_char * new_string; + + if (string_required_capacity <= *string_capacity) + { + return 0; + } + + new_string = + (ZoO_char *) realloc + ( + (void *) *string, + ((size_t) string_required_capacity) * sizeof(ZoO_char) + ); + + if (new_string== (ZoO_char *) NULL) + { + ZoO_S_ERROR + ( + io, + "Unable to reallocate memory to match string's required size." + ); + + return -1; + } + + *string_capacity = string_required_capacity; + *string = new_string; + + return 1; +} + +/******************************************************************************/ +/** ADD WORD ******************************************************************/ +/******************************************************************************/ +static int increment_required_capacity +( + size_t current_capacity [const restrict static 1], + const size_t increase_factor, + const struct ZoO_pipe io [const restrict static 1] +) +{ + if ((ZoO_INDEX_MAX - increase_factor) > *current_capacity) + { + ZoO_S_ERROR + ( + io, + "String capacity increment aborted, as the new capacity would not" + " fit in a ZoO_index variable." + ); + + return -1; + } + + *current_capacity += increase_factor; + + if ((SIZE_MAX / sizeof(ZoO_char)) > *current_capacity) + { + *current_capacity -= increase_factor; + + ZoO_S_ERROR + ( + io, + "String capacity increment aborted, as the new size would not fit" + " in a size_t variable." + ); + + return -2; + } + + return 0; +} + +static int add_word +( + const ZoO_index word_id, + struct ZoO_knowledge k [const restrict static 1], + ZoO_char * destination [const restrict static 1], + size_t destination_capacity [const restrict static 1], + size_t destination_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + const ZoO_char * word; + ZoO_index word_size; + size_t insertion_point; + + (void) ZoO_knowledge_lock_access(k, io); + ZoO_knowledge_get_word(k, word_id, &word, &word_size, io); + (void) ZoO_knowledge_unlock_access(k, io); + + insertion_point = *destination_length; + + /* word_size includes '\n', which will be replaced by a space. */ + /* (word_size == ZoO_INDEX_MAX) ==> could not have learned word. */ + if (increment_required_capacity(destination_length, (word_size + 1), io) < 0) + { + return -1; + } + + if + ( + ensure_string_capacity + ( + destination, + destination_capacity, + *destination_length, + io + ) < 0 + ) + { + return -2; + } + + memcpy + ( + (*destination + insertion_point), + (const void *) word, + word_size + ); + + (*destination)[*destination_length - 1] = ' '; + + return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ +int ZoO_sequence_to_undercase_string +( + const ZoO_index sequence [const restrict static 1], + const size_t sequence_length, + ZoO_char * destination [const restrict static 1], + struct ZoO_knowledge k [const restrict static 1], + size_t destination_capacity [const restrict static 1], + size_t destination_length [const restrict static 1], + const struct ZoO_pipe io [const restrict static 1] +) +{ + size_t i; + + for (i = 0; i < sequence_length; ++i) + { + if + ( + add_word + ( + sequence[i], + k, + destination, + destination_capacity, + destination_length, + io + ) < 0 + ) + { + *destination_length = 0; + + return -1; + } + } + + return 0; +} diff --git a/src/sequence/sequence_types.h b/src/sequence/sequence_types.h new file mode 100644 index 0000000..717d418 --- /dev/null +++ b/src/sequence/sequence_types.h @@ -0,0 +1,9 @@ +#ifndef _ZoO_CORE_SEQUENCE_TYPES_H_ +#define _ZoO_CORE_SEQUENCE_TYPES_H_ + +#define ZoO_START_OF_SEQUENCE_ID 0 +#define ZoO_END_OF_SEQUENCE_ID 1 + +#define ZoO_RESERVED_IDS_COUNT 2 + +#endif -- cgit v1.2.3-70-g09d2