/**
 * This program generates layered MDP as described in
 * "Topological Value Iteration Algorithms (2011), Dai et al., JAIR"
 *
 * The state space is evenly divided into 'n_l' "layers".
 * Every state has 'n_a' applicable actions.
 * Every action has between 1 and 'n_s' possible successor states.
 * Note: if m < n_s states are possible successors, we generate m successors
 *
 * Successors of a state can never be in a lower numbered layer.
 * That is, once a layer is crossed, we can never come back to it.
 */

#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <unordered_set>

using namespace std;

random_device rd;
mt19937 gen(rd());

struct MDPSpecs {
  size_t n;   // number of states
  size_t n_l; // number of layers
  size_t n_a; // number of actions per state
  size_t n_s; // maximum number of successors/outcomes per action
};

/**
 * Generate k values that sum to 1
 */
vector<float> generate_probabilities(size_t k) {
  vector<float> numbers(k);

  // Generate k random floats
  auto distProb = bind(uniform_real_distribution<float>(0, 1), ref(gen));
  for(size_t i = 0; i < k; ++i)
    numbers[i] = distProb();

  // Normalize so the sum is 1
  const float sum = accumulate(begin(numbers), end(numbers), 0.0f);
  transform(begin(numbers), end(numbers), begin(numbers),
            [sum](auto n) { return n / sum; });

  return numbers;
}

/**
 * Generate a layered MDP and dump a textual representation to stdout
 */
void generate_layered_mdp(const MDPSpecs& specs) {
  const size_t states_per_layer = specs.n / specs.n_l;
  cout << specs.n << '\n';
  cout.precision(3);

  // Generate states
  for(size_t id = 0; id < specs.n - 1; ++id) {
    cout << id << ' ' << specs.n_a << '\n';
    const size_t layer_id = (id * specs.n_l) / specs.n;
    const size_t id_min_neighbor = layer_id * states_per_layer;
    const size_t max_num_successors = specs.n - id_min_neighbor - 1;
    const size_t num_successors = min(specs.n_s, max_num_successors);
    auto distNbSucc =
      bind(uniform_int_distribution<size_t>(1, num_successors), ref(gen));
    auto distSucc =
      bind(uniform_int_distribution<size_t>(id_min_neighbor, specs.n - 1),
           ref(gen));

    // Generate actions of a state
    for(size_t action = 0; action < specs.n_a; ++action) {
      const size_t n_successors = distNbSucc();
      const float cost = 1.0f;
      cout << fixed << cost << ' ' << n_successors;

      // Generate all outcome probabilities of an action
      const vector<float> p = generate_probabilities(n_successors);

      // Generate successors of an action
      unordered_set<size_t> neighbors;
      neighbors.insert(id); // prevents edge-loops
      for(size_t j = 0; j < n_successors; ++j) {
        // prevents two times the same neighbor for an action
        size_t succ = distSucc();
        while(neighbors.count(succ) == 1)
          succ = distSucc();
        neighbors.insert(succ);

        cout << ' ' << succ << ' ' << fixed << p[j];
      }
      cout << '\n';
    }
  }

  // the last state is a goal state (no actions)
  cout << specs.n - 1 << ' ' << 0 << endl;
}

int main(int argc, char* argv[]) {
  if(argc != 5) {
    cerr << "Usage: "
         << argv[0] << " |S| n_layers n_actions/states n_max_successors/action"
         << endl;
    return EXIT_FAILURE;
  }

  const MDPSpecs specs = {
    .n   = strtoul(argv[1], nullptr, 10),
    .n_l = strtoul(argv[2], nullptr, 10),
    .n_a = strtoul(argv[3], nullptr, 10),
    .n_s = strtoul(argv[4], nullptr, 10)
  };

  generate_layered_mdp(specs);
}
