| summaryrefslogtreecommitdiff | 
diff options
Diffstat (limited to 'src/sequence')
| -rw-r--r-- | src/sequence/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/sequence/sequence.c | 50 | ||||
| -rw-r--r-- | src/sequence/sequence.h | 331 | ||||
| -rw-r--r-- | src/sequence/sequence_append.c | 241 | ||||
| -rw-r--r-- | src/sequence/sequence_creation.c | 605 | ||||
| -rw-r--r-- | src/sequence/sequence_from_string.c | 219 | ||||
| -rw-r--r-- | src/sequence/sequence_to_string.c | 196 | ||||
| -rw-r--r-- | src/sequence/sequence_types.h | 9 | 
8 files changed, 1662 insertions, 0 deletions
| diff --git a/src/sequence/CMakeLists.txt b/src/sequence/CMakeLists.txt new file mode 100644 index 0000000..1186557 --- /dev/null +++ b/src/sequence/CMakeLists.txt @@ -0,0 +1,11 @@ +set( +   SRC_FILES ${SRC_FILES} +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_append.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_creation.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_from_string.c +   ${CMAKE_CURRENT_SOURCE_DIR}/sequence_to_string.c +) + +set(SRC_FILES ${SRC_FILES} PARENT_SCOPE) + diff --git a/src/sequence/sequence.c b/src/sequence/sequence.c new file mode 100644 index 0000000..c9c30f1 --- /dev/null +++ b/src/sequence/sequence.c @@ -0,0 +1,50 @@ +#include <stdlib.h> +#include <string.h> + +#include "../core/index.h" + +#include "sequence.h" + +/* See "sequence.h" */ +/*@ +   requires +   ( +      \valid(sequence_a+ (0 .. sequence_a_length)) +      || (sequence_a_length == 0) +   ); + +   requires +   ( +      \valid(sequence_b+ (0 .. sequence_b_length)) +      || (sequence_b_length == 0) +   ); + +   assigns \result; +@*/ +int JH_sequence_cmp +( +   const JH_index sequence_a [const restrict static 1], +   const JH_index sequence_b [const restrict static 1], +   const JH_index length +) +{ +   JH_index i, a, b; + +   for (i = 0; i < length; ++i) +   { +      a = sequence_a[i]; +      b = sequence_b[i]; + +      if (a < b) +      { +         return -1; +      } +      else if (a > b) +      { +         return 1; +      } +   } + +   return 0; +} + diff --git a/src/sequence/sequence.h b/src/sequence/sequence.h new file mode 100644 index 0000000..dc67159 --- /dev/null +++ b/src/sequence/sequence.h @@ -0,0 +1,331 @@ +#ifndef _JH_CORE_SEQUENCE_H_ +#define _JH_CORE_SEQUENCE_H_ + +/* Defines SIZE_MAX */ +#include <stdint.h> + +#include "../core/char_types.h" +#include "../core/index_types.h" + +#include "../error/error.h" + +#include "../knowledge/knowledge_types.h" + +#include "sequence_types.h" + +/*@ +   requires \valid(sequence); +   requires (\block_length(sequence) >= 1); +   requires \valid(sequence_capacity); +   requires (\block_length(sequence) >= 1); +   requires \valid(io); + +   requires (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   requires ((sequence_required_capacity * sizeof(JH_index)) <= SIZE_MAX); + +   requires +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (io+ (0 ..\block_length(io))) +      ); + +   ensures +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (io+ (0 ..\block_length(io))) +      ); + +   ensures (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   ensures ((sequence_required_capacity * sizeof(JH_index)) <= SIZE_MAX); +   ensures \valid(sequence); +   ensures \valid(*sequence); +   ensures \valid(sequence_capacity); +   ensures \valid(io); + +   assigns (*sequence); +   assigns (*sequence_capacity); + +   ensures ((\result == 1) || (\result == 0) || (\result == -1)); + +   ensures +      ( +         (\result == 1) ==> +            ((*sequence_capacity) == sequence_required_capacity) +      ); + +   ensures +      ( +         (\result == 1) ==> +            ((*sequence_capacity) > \old(*sequence_capacity)) +      ); + +   ensures ((\result == -1) ==> ((*sequence) == \old(*sequence))); + +   ensures +      ( +         (\result == -1) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); + +   ensures ((\result == 0) ==> ((*sequence) == \old(*sequence))); + +   ensures +      ( +         (\result == 0) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); +@*/ +int JH_sequence_ensure_capacity +( +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   const size_t sequence_required_capacity, +   FILE io [const restrict static 1] +); + +int JH_sequence_from_undercase_string +( +   const JH_char string [const restrict], +   const size_t string_length, +   struct JH_knowledge k [const restrict static 1], +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +); + +/* + * Creates a sequence containing {initial_word}. The remaining elements of + * sequence are added according to what is known to {k} as being possible. + * The resulting sequence starts by JH_START_OF_SEQUENCE_ID, and ends by + * JH_END_OF_SEQUENCE_ID. The sequence is allocated by the function. If an + * error occur, it is unallocated and set to NULL ({sequence_size} is set + * accordingly). + * Return: + *    0 on success. + *    -1 iff the allocating failed. + *    -2 iff the sequence initialization failed. + *    -3 iff an error occured when trying to add elements to the right of the + *       sequence. + *    -4 iff an error occured when trying to add elements to the left of the + *       sequence. + *    -5 iff the resulting sequence would have been empty. + * Pre: + *    (> {markov_order} 1) + *    (knows {k} {initial_word}) + *    (initialized {k}) + */ +int JH_sequence_create_from +( +   const JH_index initial_word, +   size_t credits [const restrict], +   struct JH_knowledge k [const restrict static 1], +   const JH_index markov_order, +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +); + +/*@ +   requires \valid(sequence); +   requires \valid(*sequence); +   requires \valid(sequence_capacity); +   requires \valid(sequence_length); +   requires \valid(io); +   requires (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); +   requires (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   requires +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (sequence_length+ (0 ..\block_length(sequence_length))), +         (io+ (0 ..\block_length(io))) +      ); + +   assigns (*sequence_length); +   assigns (*sequence[0]); +   assigns (*sequence_capacity); + +   ensures \valid(sequence); +   ensures \valid(*sequence); +   ensures \valid(sequence_capacity); +   ensures \valid(sequence_length); +   ensures \valid(io); +   ensures (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); +   ensures (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   ensures +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (sequence_length+ (0 ..\block_length(sequence_length))), +         (io+ (0 ..\block_length(io))) +      ); + +   ensures ((\result == 0) || (\result == -1)); + +   ensures +   ( +      (\result == 0) ==> +         (*sequence_length == (\old(*sequence_length) + 1)) +   ); + +   ensures +      ( +         (\result == 0) ==> +            ((*sequence_capacity) >= \old(*sequence_capacity)) +      ); + +   ensures ((\result == 0) ==> (*sequence_length > \old(*sequence_length))); + +   ensures ((\result == -1) ==> ((*sequence_length) == \old(*sequence_length))); +   ensures ((\result == -1) ==> (((*sequence)[0]) == \old((*sequence)[0]))); +   ensures +      ( +         (\result == -1) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); + +   ensures ((\result == -1) ==> ((*sequence_length) == \old(*sequence_length))); +   ensures +      ( +         (\result == -1) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); + +@*/ +int JH_sequence_append_left +( +   const JH_index word_id, +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +); + +/*@ +   requires \valid(sequence); +   requires \valid(*sequence); +   requires \valid(sequence_capacity); +   requires \valid(sequence_length); +   requires \valid(io); +   requires (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); +   requires (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   requires +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (sequence_length+ (0 ..\block_length(sequence_length))), +         (io+ (0 ..\block_length(io))) +      ); + +   assigns (*sequence_length); +   assigns ((*sequence)[0]); +   assigns (*sequence_capacity); + +   ensures \valid(sequence); +   ensures \valid(*sequence); +   ensures \valid(sequence_capacity); +   ensures \valid(sequence_length); +   ensures \valid(io); +   ensures (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); +   ensures (((*sequence_capacity) * sizeof(JH_index)) <= SIZE_MAX); +   ensures +      \separated +      ( +         (sequence+ (0 .. \block_length(sequence))), +         ((*sequence)+ (0 .. \block_length(*sequence))), +         (sequence_capacity+ (0 ..\block_length(sequence_capacity))), +         (sequence_length+ (0 ..\block_length(sequence_length))), +         (io+ (0 ..\block_length(io))) +      ); + +   ensures ((\result == 0) || (\result == -1)); + +   ensures +   ( +      (\result == 0) ==> +         (*sequence_length == (\old(*sequence_length) + 1)) +   ); + +   ensures +      ( +         (\result == 0) ==> +            ((*sequence_capacity) >= \old(*sequence_capacity)) +      ); + +   ensures ((\result == 0) ==> (*sequence_length > \old(*sequence_length))); + +   ensures ((\result == -1) ==> ((*sequence_length) == \old(*sequence_length))); +   ensures ((\result == -1) ==> (((*sequence)[0]) == \old((*sequence)[0]))); +   ensures +      ( +         (\result == -1) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); + +   ensures ((\result == -1) ==> ((*sequence_length) == \old(*sequence_length))); +   ensures +      ( +         (\result == -1) ==> +            ((*sequence_capacity) == \old(*sequence_capacity)) +      ); + +@*/ +int JH_sequence_append_right +( +   JH_index * sequence [const restrict static 1], +   const JH_index word_id, +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +); + +/* + * Compares two sequences. + * JH_END_OF_SEQUENCE marks the ending of a sequence, regardless of indicated + * sequence length, meaning that [10][JH_END_OF_SEQUENCE][9] and + * [10][JH_END_OF_SEQUENCE][8] are considered equal. Sequences do not have to + * contain JH_END_OF_SEQUENCE. [10][JH_END_OF_SEQUENCE] and [10] are + * considered different, [10][JH_END_OF_SEQUENCE] + * and [10][JH_END_OF_SEQUENCE][JH_END_OF_SEQUENCE] are considered equal. + * Same logic is applyied for JH_START_OF_SEQUENCE: + * [START_OF_SEQUENCE][10] is not [10], but + * [START_OF_SEQUENCE][START_OF_SEQUENCE][10] and [START_OF_SEQUENCE][10] are + * the same. + * Return: + *    1 iff {sequence_a} should be considered being more than {sequence_b} + *    0 iff {sequence_a} should be considered being equal to {sequence_b} + *    -1 iff {sequence_a} should be considered being less than {sequence_b} + */ +int JH_sequence_cmp +( +   const JH_index sequence_a [const], +   const JH_index sequence_b [const], +   const JH_index length +); + +int JH_sequence_to_undercase_string +( +   const JH_index sequence [const restrict static 1], +   const size_t sequence_length, +   struct JH_knowledge k [const restrict static 1], +   JH_char * destination [const restrict static 1], +   size_t destination_capacity [const restrict static 1], +   size_t destination_length [const restrict static 1], +   FILE io [const restrict static 1] +); + +#endif diff --git a/src/sequence/sequence_append.c b/src/sequence/sequence_append.c new file mode 100644 index 0000000..7206c19 --- /dev/null +++ b/src/sequence/sequence_append.c @@ -0,0 +1,241 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/index.h" + +#include "../error/error.h" + +#include "sequence.h" + +/******************************************************************************/ +/** MEMORY (RE)ALLOCATION *****************************************************/ +/******************************************************************************/ + +/*@ +   requires \valid(required_capacity); +   requires \valid(io); +   requires +      \separated +      ( +         (required_capacity+ (0 .. \block_length(required_capacity))), +         (io+ (0 .. \block_length(io))) +      ); + +   assigns \result; +   assigns (*required_capacity); + +   ensures ((\result == 0) || (\result == -1)); + +   ensures \valid(required_capacity); +   ensures \valid(io); + +   ensures +      \separated +      ( +         (required_capacity+ (0 .. \block_length(required_capacity))), +         (io+ (0 .. \block_length(io))) +      ); + +   ensures +      ( +         (\result == 0) <==> +               ((*required_capacity) == (\old(*required_capacity) + 1)) +      ); + +   ensures +      ( +         (\result == 0) ==> +            ((*required_capacity) * sizeof(JH_index)) <= SIZE_MAX +      ); + +   ensures +      ( +         (\result == -1) <==> +            ((*required_capacity) == \old(*required_capacity)) +      ); + +@*/ +static int increment_required_capacity +( +   size_t required_capacity [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   if +   ( +      (*required_capacity == SIZE_MAX) +      || ((SIZE_MAX / sizeof(JH_index)) <= (*required_capacity + 1)) +   ) +   { +      /*@ assert \valid(io); @*/ + +      #ifndef JH_RUNNING_FRAMA_C +      JH_S_ERROR +      ( +         io, +         "Sequence capacity increment aborted, as the new size would not fit " +         "in a size_t variable." +      ); +      #endif + +      return -1; +   } + +   /*@ assert ((*required_capacity) < SIZE_MAX); @*/ +   /*@ assert \valid(required_capacity); @*/ +   *required_capacity = (*required_capacity + 1); + +   /* assert (((*required_capacity) * sizeof(JH_index)) <= SIZE_MAX); @*/ + +   return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +int JH_sequence_ensure_capacity +( +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   const size_t sequence_required_capacity, +   FILE io [const restrict static 1] +) +{ +   JH_index * new_sequence; + +   /*@ assert \valid(sequence_capacity); @*/ +   if (sequence_required_capacity <= *sequence_capacity) +   { +      return 0; +   } + +   /*@ +      assert +      ( +         sequence_required_capacity +         <= (SIZE_MAX / sizeof(JH_index)) +      ); +   @*/ +   /*@ assert \valid(sequence); @*/ +   /*@ assert \valid(*sequence); @*/ +   new_sequence = +      (JH_index *) realloc +      ( +         (void *) *sequence, +         (sequence_required_capacity * sizeof(JH_index)) +      ); + +   if (new_sequence == (JH_index *) NULL) +   { +      /*@ assert \valid(io); @*/ + +      #ifndef JH_RUNNING_FRAMA_C +      JH_S_ERROR +      ( +         io, +         "Unable to reallocate memory to match sequence's required size." +      ); +      #endif + +      return -1; +   } + +   *sequence_capacity = sequence_required_capacity; +   *sequence = new_sequence; + +   return 1; +} + +int JH_sequence_append_left +( +   const JH_index word_id, +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   if (increment_required_capacity(sequence_length, io) < 0) +   { +      return -1; +   } + +   /*@ assert (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); @*/ +   if +   ( +      JH_sequence_ensure_capacity +      ( +         sequence, +         sequence_capacity, +         *sequence_length, +         io +      ) < 0 +   ) +   { +      *sequence_length -= 1; + +      return -1; +   } + +   /*@ assert (*sequence_length) >= 0; @*/ + +   if ((*sequence_length) > 1) +   { +      /*@ assert(((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); @*/ + +      #ifndef JH_RUNNING_FRAMA_C +      memmove +      ( +         (void *) ((*sequence) + 1), +         (const void *) (*sequence), +         (((*sequence_length) - 1) * sizeof(JH_index)) +      ); +      #endif +   } + +   (*sequence)[0] = word_id; + +   return 0; +} + +int JH_sequence_append_right +( +   JH_index * sequence [const restrict static 1], +   const JH_index word_id, +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   if (increment_required_capacity(sequence_length, io) < 0) +   { +      return -1; +   } + +   /*@ assert (((*sequence_length) * sizeof(JH_index)) <= SIZE_MAX); @*/ + +   if +   ( +      JH_sequence_ensure_capacity +      ( +         sequence, +         sequence_capacity, +         *sequence_length, +         io +      ) < 0 +   ) +   { +      *sequence_length -= 1; + +      return -1; +   } + +   /*@ assert ((*sequence_length) >= 1); @*/ +   (*sequence)[(*sequence_length) - 1] = word_id; +   /*@ assert ((*sequence_length) >= 1); @*/ + +   return 0; +} diff --git a/src/sequence/sequence_creation.c b/src/sequence/sequence_creation.c new file mode 100644 index 0000000..0b5e393 --- /dev/null +++ b/src/sequence/sequence_creation.c @@ -0,0 +1,605 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/index.h" + +#include "../error/error.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE LEFT ***********************************************/ +/******************************************************************************/ + +/* + * Adds an id to the left of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Semaphore: + *    Takes then releases access for {k}. + * Returns: + *    0 on success. + *    -1 iff nothing fitting was found. + *    -2 iff the addition of that id failed. + * Pre: + *    (initialized {sequence}) + *    (initialized {k}) + *    (> {markov_order} 1) + *    (initialized {*sequence[0..({markov_order} - 1)]}) + */ +static int extend_left +( +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   const JH_index markov_order, +   struct JH_knowledge k [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   JH_index sequence_id, word_id; + +   (void) JH_knowledge_lock_access(k, io); + +   /* preceding_words_weights_sum > 0 */ + +   if +   ( +      JH_knowledge_find_sequence +      ( +         k, +         ((*sequence) + 1), +         markov_order, +         &sequence_id +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      JH_S_ERROR(io, "Could not find matching TWS sequence."); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   (void) JH_knowledge_lock_access(k, io); + +   if +   ( +      JH_knowledge_random_tws_target +      ( +         k, +         &word_id, +         (*sequence)[0], +         sequence_id +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      JH_S_ERROR(io, "Could not find matching TWS target."); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   if +   ( +      JH_sequence_append_left +      ( +         word_id, +         sequence, +         sequence_capacity, +         sequence_length, +         io +      ) < 0 +   ) +   { +      return -3; +   } + +   return 0; +} + +/* + * Continuously adds ids to the left of the sequence, according to what is known + * as likely to fit there. If {credits} is NULL, it will stop upon reaching + * the id indicating the start of a sequence, otherwise it will also limit to + * {*credits} words added (including the one indicating the start of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remains unfreed. + * Returns: + *    0 on success. + *    -1 iff we did not manage to have JH_START_OF_SEQUENCE_ID as a starting + *       point. This cannot be caused by lack of {*credits}, but rather by a + *       memory allocation problem or a more important issue in {k}. Indeed, it + *       could mean we do not know any word preceding {*sequence[0]}, not even + *       JH_START_OF_SEQUENCE_ID. + * Pre: + *    (initialized {sequence}) + *    (initialized {sequence_size}) + *    (initialized {k}) + *    (> {markov_order} 1) + *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_left_part_of_sequence +( +   JH_index * sequence [restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   const JH_index markov_order, +   size_t credits [const restrict], +   struct JH_knowledge k [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   for (;;) +   { +      if ((credits == (size_t *) NULL) || (*credits > 0)) +      { +         if +         ( +            extend_left +            ( +               sequence, +               sequence_capacity, +               sequence_length, +               markov_order, +               k, +               io +            ) < 0 +         ) +         { +            /* We are sure *sequence[0] is defined. */ +            if ((*sequence)[0] == JH_START_OF_SEQUENCE_ID) +            { +               /* +                * We failed to add a word, but it was because none should have +                * been added. +                */ +               return 0; +            } +            else +            { +               return -1; +            } +         } +      } +      else +      { +         /* No more credits available, the sequence will have to start here. */ +         (*sequence)[0] = JH_START_OF_SEQUENCE_ID; + +         return 0; +      } + +      if (credits != (size_t *) NULL) +      { +         *credits -= 1; +      } + +      /* We are sure *sequence[0] is defined. */ +      switch ((*sequence)[0]) +      { +         case JH_END_OF_SEQUENCE_ID: +            JH_S_WARNING +            ( +               io, +               "END OF LINE was added at the left part of an sequence." +            ); + +            (*sequence)[0] = JH_START_OF_SEQUENCE_ID; +            return 0; + +         case JH_START_OF_SEQUENCE_ID: +            return 0; + +         default: +            break; +      } +   } +} + +/******************************************************************************/ +/** ADDING ELEMENTS TO THE RIGHT **********************************************/ +/******************************************************************************/ + + +/* + * Adds an id to the right of the sequence, according to what is known as likely + * to fit there. + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {*sequence} remains untouched. + * Semaphore: + *    Takes then releases access for {k}. + * Returns: + *    0 on success. + *    -1 iff nothing fitting was found. + *    -2 iff the addition of that id failed. + * Pre: + *    (initialized {sequence}) + *    (initialized {k}) + *    (> {markov_order} 1) + *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int extend_right +( +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   const JH_index markov_order, +   struct JH_knowledge k [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   JH_index sequence_id, word_id; + +   (void) JH_knowledge_lock_access(k, io); + +   /* preceding_words_weights_sum > 0 */ + +   if +   ( +      JH_knowledge_find_sequence +      ( +         k, +         ((*sequence) + (*sequence_length - markov_order)), +         markov_order, +         &sequence_id +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      JH_S_PROG_ERROR +      ( +         io, +         "Knowledge consistency error: generated markov sequence could not be " +         "found." +      ); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   (void) JH_knowledge_lock_access(k, io); + +   if +   ( +      JH_knowledge_random_swt_target +      ( +         k, +         sequence_id, +         (*sequence)[*sequence_length - 1], +         &word_id +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      JH_S_PROG_ERROR +      ( +         io, +         "Knowledge consistency error: generated markov sequence had no known " +         "targets." +      ); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + + +   /* following_words_weights_sum > 0 */ + +   if +   ( +      JH_sequence_append_right +      ( +         sequence, +         word_id, +         sequence_capacity, +         sequence_length, +         io +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      return -3; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   return 0; +} + +/* + * Continuously adds ids to the right of the sequence, according to what is + * known as likely to fit there. If {credits} is NULL, it will stop upon + * reaching the id indicating the end of a sequence, otherwise it will also + * limit to {*credits} words added (including the one indicating the end of a + * sequence). + * This requires the reallocation of {sequence}. The freeing of the previous + * memory space is handled. If an error happened, {sequence} remain untouched. + * Returns: + *    0 on success. + *    -1 iff we did not manage to have JH_END_OF_SEQUENCE_ID as a stopping + *       point. This cannot be caused by lack of {*credits}, but rather by a + *       memory allocation problem or a more important issue in {k}. Indeed, it + *       could mean we do not know any word following {*sequence[0]}, not even + *       JH_END_OF_SEQUENCE_ID. + * Pre: + *    (initialized {sequence}) + *    (initialized {*sequence_size}) + *    (initialized {k}) + *    (> {markov_order} 1) + *    (initialized {*sequence[0..(MARKOV_ORDER - 1)]}) + */ +static int complete_right_part_of_sequence +( +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   const JH_index markov_order, +   size_t credits [const restrict], +   struct JH_knowledge k [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   for (;;) +   { +      if ((credits == (size_t *) NULL) || (*credits > 0)) +      { +         if +         ( +            extend_right +            ( +               sequence, +               sequence_capacity, +               sequence_length, +               markov_order, +               k, +               io +            ) < 0 +         ) +         { +            /* Safe: (> sequence_length 1) */ +            if ((*sequence)[(*sequence_length - 1)] == JH_END_OF_SEQUENCE_ID) +            { +               /* +                * We failed to add a word, but it was because none should have +                * been added. +                */ +               return 0; +            } +            else +            { +               return -1; +            } +         } +      } +      else +      { +         /* No more credits available, we end the sequence. */ +         (*sequence)[((*sequence_length) - 1)] = JH_END_OF_SEQUENCE_ID; + +         return 0; +      } + +      if (credits != (size_t *) NULL) +      { +         *credits -= 1; +      } + +      /* Safe: (> sequence_length 1) */ +      switch ((*sequence)[((*sequence_length) - 1)]) +      { +         case JH_START_OF_SEQUENCE_ID: +            JH_S_WARNING +            ( +               io, +               "END OF LINE was added at the right part of an sequence." +            ); + +            (*sequence)[((*sequence_length) - 1)] = JH_END_OF_SEQUENCE_ID; +            return 0; + +         case JH_END_OF_SEQUENCE_ID: +            return 0; + +         default: +            break; +      } +   } +} + +/******************************************************************************/ +/** INITIALIZING SEQUENCE *****************************************************/ +/******************************************************************************/ + +/* + * Initializes an pre-allocated sequence by filling it with {initial_word} + * followed by a sequence of ({markov_order} - 1) words that is known to have + * followed {initial_word} at least once. This sequence is chosen depending on + * how often {k} indicates it has followed {initial_word}. + * Returns: + *    0 on success. + *    -1 if no such sequence was found. + * Pre: + *    (size (= {sequence} {markov_order})) + *    (initialized {k}) + *    (> markov_order 1) + * Post: + *    (initialized {sequence[0..(markov_order - 1)]}) + */ +static int initialize_sequence +( +   JH_index sequence [const restrict static 1], +   const JH_index initial_word, +   const JH_index markov_order, +   struct JH_knowledge k [const static 1], +   FILE io [const restrict static 1] +) +{ +   sequence[(markov_order - 1)] = initial_word; + +   (void) JH_knowledge_lock_access(k, io); + +   if +   ( +      JH_knowledge_copy_random_swt_sequence +      ( +         k, +         sequence, +         initial_word, +         markov_order, +         io +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   if (JH_DEBUG_SEQUENCE_CREATION_INIT) +   { +      JH_index i; + +      for (i = 0; i < markov_order; ++i) +      { +         JH_DEBUG +         ( +            io, +            JH_DEBUG_SEQUENCE_CREATION_INIT, +            "sequence[%u]: %u", +            i, +            sequence[i] +         ); +      } +   } + +   return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +/* See "sequence.h" */ +int JH_sequence_create_from +( +   const JH_index initial_word, +   size_t credits [const restrict], +   struct JH_knowledge k [const restrict static 1], +   const JH_index markov_order, +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   if +   ( +      JH_sequence_ensure_capacity +      ( +         sequence, +         sequence_capacity, +         markov_order, +         io +      ) < 0 +   ) +   { +      *sequence_length = 0; + +      return -1; +   } + +   if +   ( +      initialize_sequence +      ( +         *sequence, +         initial_word, +         markov_order, +         k, +         io +      ) < 0 +   ) +   { +      JH_S_ERROR(io, "Failed to create start of new sequence."); + +      *sequence_length = 0; + +      return -2; +   } + +   *sequence_length = markov_order; + +   if +   ( +      complete_right_part_of_sequence +      ( +         sequence, +         sequence_capacity, +         sequence_length, +         markov_order, +         credits, +         k, +         io +      ) < 0 +   ) +   { +      JH_S_ERROR(io, "Failed to create right part of sequence."); + +      *sequence_length = 0; + +      return -3; +   } + +   if +   ( +      complete_left_part_of_sequence +      ( +         sequence, +         sequence_capacity, +         sequence_length, +         markov_order, +         credits, +         k, +         io +      ) < 0 +   ) +   { +      JH_S_ERROR(io, "Failed to create left part of sequence."); + +      *sequence_length = 0; + +      return -4; +   } + +   if (*sequence_length < 3) +   { +      /* 2 elements, for start and stop. */ +      JH_S_ERROR(io, "Created sequence was empty."); + +      *sequence_length = 0; + +      return -5; +   } + +   return 0; +} diff --git a/src/sequence/sequence_from_string.c b/src/sequence/sequence_from_string.c new file mode 100644 index 0000000..5b92943 --- /dev/null +++ b/src/sequence/sequence_from_string.c @@ -0,0 +1,219 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../error/error.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +/******************************************************************************/ +/** HANDLING WORDS ************************************************************/ +/******************************************************************************/ + +static int add_word_to_sequence +( +   const JH_char string [const restrict static 1], +   const size_t word_start, +   const size_t word_length, +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   struct JH_knowledge k [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   JH_index word_id; + +   (void) JH_knowledge_lock_access(k, io); + +   if +   ( +      JH_knowledge_learn_word +      ( +         k, +         (string + word_start), +         word_length, +         &word_id, +         io +      ) < 0 +   ) +   { +      (void) JH_knowledge_unlock_access(k, io); + +      return -1; +   } + +   (void) JH_knowledge_unlock_access(k, io); + +   if +   ( +      JH_sequence_append_right +      ( +         sequence, +         word_id, +         sequence_capacity, +         sequence_length, +         io +      ) < 0 +   ) +   { +      return -1; +   } + +   return 0; +} + +static int find_word +( +   const JH_char string [const restrict static 1], +   const size_t string_length, +   const size_t offset, +   size_t word_start [const restrict static 1], +   size_t word_length [const restrict static 1] +) +{ +   size_t i; + +   i = offset; + +   while ((string[i] == ' ') && (i < string_length)) +   { +      i += 1; +   } + +   if (i >= string_length) +   { +      return -1; +   } + +   *word_start = i; + +   while ((string[i] != ' ') && (i < string_length)) +   { +      i += 1; +   } + +   *word_length = (i - *word_start); + +   return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ + +/* See: "sequence.h" */ +int JH_sequence_from_undercase_string +( +   const JH_char string [const restrict], +   const size_t string_length, +   struct JH_knowledge k [const restrict static 1], +   JH_index * sequence [const restrict static 1], +   size_t sequence_capacity [const restrict static 1], +   size_t sequence_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   size_t word_start, word_length; +   size_t i; + +   i = 0; + +   *sequence_length = 0; + +   JH_DEBUG +   ( +      io, +      JH_DEBUG_SEQUENCE_FROM_STRING, +      "Converting string of size %lu to sequence.", +      string_length +   ); + +   if +   ( +      JH_sequence_append_right +      ( +         sequence, +         JH_START_OF_SEQUENCE_ID, +         sequence_capacity, +         sequence_length, +         io +      ) < 0 +   ) +   { +      *sequence_length = 0; + +      return -1; +   } + +   JH_S_DEBUG +   ( +      io, +      JH_DEBUG_SEQUENCE_FROM_STRING, +      "[SOS] added to sequence." +   ); + +   while (i < string_length) +   { +      if (find_word(string, string_length, i, &word_start, &word_length) < 0) +      { +         break; +      } + +      JH_DEBUG +      ( +         io, +         JH_DEBUG_SEQUENCE_FROM_STRING, +         "Word of size %lu found in string at index %lu.", +         word_length, +         word_start +      ); + +      if +      ( +         add_word_to_sequence +         ( +            string, +            word_start, +            word_length, +            sequence, +            sequence_capacity, +            sequence_length, +            k, +            io +         ) < 0 +      ) +      { +         *sequence_length = 0; + +         return -1; +      } + +      i = (word_start + word_length); +   } + +   if +   ( +      JH_sequence_append_right +      ( +         sequence, +         JH_END_OF_SEQUENCE_ID, +         sequence_capacity, +         sequence_length, +         io +      ) < 0 +   ) +   { +      *sequence_length = 0; + +      return -1; +   } + +   return 0; +} diff --git a/src/sequence/sequence_to_string.c b/src/sequence/sequence_to_string.c new file mode 100644 index 0000000..6794fcb --- /dev/null +++ b/src/sequence/sequence_to_string.c @@ -0,0 +1,196 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdlib.h> +#include <string.h> +#include <stdint.h> /* defines SIZE_MAX */ + +#include "../core/char.h" +#include "../core/index.h" + +#include "../error/error.h" + +#include "../knowledge/knowledge.h" + +#include "sequence.h" + +/******************************************************************************/ +/** MEMORY ALLOCATION *********************************************************/ +/******************************************************************************/ +static int ensure_string_capacity +( +   JH_char * string [const restrict static 1], +   size_t string_capacity [const restrict static 1], +   const size_t string_required_capacity, +   FILE io [const restrict static 1] +) +{ +   JH_char * new_string; + +   if (string_required_capacity <= *string_capacity) +   { +      return 0; +   } + +   new_string = +      (JH_char *) realloc +      ( +         (void *) *string, +         ((size_t) string_required_capacity) * sizeof(JH_char) +      ); + +   if (new_string== (JH_char *) NULL) +   { +      JH_S_ERROR +      ( +         io, +         "Unable to reallocate memory to match string's required size." +      ); + +      return -1; +   } + +   *string_capacity = string_required_capacity; +   *string = new_string; + +   return 1; +} + +/******************************************************************************/ +/** ADD WORD ******************************************************************/ +/******************************************************************************/ +static int increment_required_capacity +( +   size_t current_capacity [const restrict static 1], +   const size_t increase_factor, +   FILE io [const restrict static 1] +) +{ +   if ((JH_INDEX_MAX - increase_factor) < *current_capacity) +   { +      JH_S_ERROR +      ( +         io, +         "String capacity increment aborted, as the new capacity would not " +         "fit in a JH_index variable." +      ); + +      return -1; +   } + +   *current_capacity += increase_factor; + +   if ((SIZE_MAX / sizeof(JH_char)) < *current_capacity) +   { +      *current_capacity -= increase_factor; + +      JH_S_ERROR +      ( +         io, +         "String capacity increment aborted, as the new size would not fit " +         "in a size_t variable." +      ); + +      return -2; +   } + +   return 0; +} + +static int add_word +( +   const JH_index word_id, +   struct JH_knowledge k [const restrict static 1], +   JH_char * destination [const restrict static 1], +   size_t destination_capacity [const restrict static 1], +   size_t destination_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   const JH_char * word; +   JH_index word_size; +   size_t insertion_point; + +   if (word_id < JH_RESERVED_IDS_COUNT) +   { +      return 0; +   } + +   (void) JH_knowledge_lock_access(k, io); +   JH_knowledge_get_word(k, word_id, &word, &word_size); +   (void) JH_knowledge_unlock_access(k, io); + +   insertion_point = *destination_length; + +   /* word_size includes '\n', which will be replaced by a space. */ +   /* (word_size == JH_INDEX_MAX) ==> could not have learned word. */ +   if (increment_required_capacity(destination_length, (word_size + 1), io) < 0) +   { +      return -1; +   } + +   if +   ( +      ensure_string_capacity +      ( +         destination, +         destination_capacity, +         *destination_length, +         io +      ) < 0 +   ) +   { +      return -2; +   } + +   memcpy +   ( +      (*destination + insertion_point), +      (const void *) word, +      word_size +   ); + +   (*destination)[*destination_length - 1] = ' '; + +   return 0; +} + +/******************************************************************************/ +/** EXPORTED ******************************************************************/ +/******************************************************************************/ +int JH_sequence_to_undercase_string +( +   const JH_index sequence [const restrict static 1], +   const size_t sequence_length, +   struct JH_knowledge k [const restrict static 1], +   JH_char * destination [const restrict static 1], +   size_t destination_capacity [const restrict static 1], +   size_t destination_length [const restrict static 1], +   FILE io [const restrict static 1] +) +{ +   size_t i; + +   *destination_length = 0; + +   for (i = 0; i < sequence_length; ++i) +   { +      if +      ( +         add_word +         ( +            sequence[i], +            k, +            destination, +            destination_capacity, +            destination_length, +            io +         ) < 0 +      ) +      { +         *destination_length = 0; + +         return -1; +      } +   } + +   return 0; +} diff --git a/src/sequence/sequence_types.h b/src/sequence/sequence_types.h new file mode 100644 index 0000000..bce5644 --- /dev/null +++ b/src/sequence/sequence_types.h @@ -0,0 +1,9 @@ +#ifndef _JH_CORE_SEQUENCE_TYPES_H_ +#define _JH_CORE_SEQUENCE_TYPES_H_ + +#define JH_START_OF_SEQUENCE_ID 0 +#define JH_END_OF_SEQUENCE_ID   1 + +#define JH_RESERVED_IDS_COUNT   2 + +#endif | 


