/**
 * This program generates WetFloor MDP as described in
 * "Learning depth-first search (2006), Bonet and Geffner, ICAPS"
 *
 * The state space is a square navigation grid where some cells are wet and thus slippery.
 * Every cell has a level of wetness: Dry, SlightlyWet or HeavilyWet.
 * Cells are independently chosen as wet with probability p.
 * Wet cells are SlightlyWet with probability q, and HeavilyWet with probability 1-q.
 *
 * Every state has at most 4 applicable actions: Up, Down, Left, Right
 * If a cell is dry, actions are deterministic.
 * If a cell is wet, actions' outcome depends on a parameter r:
 *   If a cell is slightly wet, r = 0.15
 *   If a cell is heavily wet, r = 0.30
 *   r is the max probability of a specific diverging outcome.
 *   ex.: on a heavily wet cell, action UP has:
 *     probability in [0, 0.30] of going diagonal UP-LEFT
 *     probability in [0, 0.30] of going diagonal UP-RIGHT
 *     probability in [0, 0.30] of staying in current position
 *     remaining probability of going UP
 *
 * It is also possible to generate a layered-wetfloor domain where many (as
 * previously described) wetfloor domains are "connected" to each other (like
 * multiple rooms connected together in a house). The "rooms" are chained such
 * that there is never a cycle between two rooms (paths connecting different
 * rooms are unidirectional).
 */
#include <functional>
#include <iostream>
#include <limits>
#include <map>
#include <random>

using namespace std;

constexpr float p = 0.4f; // probability of being wet (slightly or heavily)
constexpr float q = 0.5f; // probability of being slightly wet when wet
constexpr float r_slightly = 0.200f; // outcome divergence in slightly wet cells
constexpr float r_heavily  = 0.333f; // outcome divergence in heavily wet cells
constexpr float cost = 1.0f; // cost of every action

random_device rd;
mt19937 gen(rd());
auto distProb = bind(uniform_real_distribution<float>(0.0f, 1.0f), ref(gen));

class WetRoom {
 public:
  WetRoom(size_t numRows, size_t numCols, size_t roomOffset)
    : nRows(numRows), nCols(numCols), offset(roomOffset),
      nStates(nRows * nCols) {
    out_id = getRandomPosition();
  }

  void generateAndPrint() const;

 private:
  enum WetLevel {
    Dry         = 0,
    SlightlyWet = 1,
    HeavilyWet  = 2
  };

  enum Action {
    Up    = 0,
    Down  = 1,
    Left  = 2,
    Right = 3,
    End   = 4 // action to end a trial when at goal or go to neighboring room
  };

  map<Action, pair<size_t, size_t>>
  getApplicableActions(size_t row, size_t col) const;

  map<size_t, float>
  getSlipperyNeighbors(size_t row, size_t col, Action a, WetLevel w) const;

  size_t state_id(size_t row, size_t col) const;
  size_t getRandomPosition() const;
  WetLevel generateWetLevel() const;

  size_t nRows;
  size_t nCols;
  size_t offset;

  size_t nStates;

  size_t out_id = nStates - 1; // state id going to goal or to neighboring room
};

void WetRoom::generateAndPrint() const {
  cout.precision(3);

  // generate states
  for(size_t i = 0; i < nRows; ++i) {
    for(size_t j = 0; j < nCols; ++j) {
      const WetLevel s_level = generateWetLevel();
      const size_t s_id = state_id(i, j);
      auto actions = getApplicableActions(i, j);
      if(s_id == out_id)
        actions[End];

      cout << s_id + offset << ' ' << size(actions) << '\n';

      // process actions of current state
      for(const auto& [action, new_pos] : actions) {
        if(action == End) {
          cout << fixed << cost << ' ' << 1 << ' '
               << nStates + offset << ' ' << fixed << 1.0f << '\n';
          continue;
        }

        const auto [newI, newJ] = new_pos;
        const size_t new_id = state_id(newI, newJ);
        if(s_level == Dry) {
          cout << fixed << cost << ' ' << 1 << ' '
               << new_id + offset << ' ' << fixed << 1.0f << '\n';
        } else { // the cell is wet
          const auto outcomes = getSlipperyNeighbors(i, j, action, s_level);
          cout << fixed << cost << ' ' << size(outcomes);
          for(const auto& [neighbor, proba] : outcomes)
            cout << ' ' << neighbor + offset << ' ' << fixed << proba;
          cout << '\n';
        }
      }
    }
  }
}

size_t WetRoom::state_id(size_t row, size_t col) const {
  if(row >= nRows || col >= nCols)
    return numeric_limits<size_t>::max();
  return (row * nCols) + col;
}

map<WetRoom::Action, pair<size_t, size_t>>
WetRoom::getApplicableActions(size_t row, size_t col) const {
  map<WetRoom::Action, pair<size_t, size_t>> actions;

  if(state_id(row - 1, col) != numeric_limits<size_t>::max())
    actions[Up] = {row - 1, col};
  if(state_id(row + 1, col) != numeric_limits<size_t>::max())
    actions[Down] = {row + 1, col};
  if(state_id(row, col - 1) != numeric_limits<size_t>::max())
    actions[Left] = {row, col - 1};
  if(state_id(row, col + 1) != numeric_limits<size_t>::max())
    actions[Right] = {row, col + 1};

  return actions;
}

map<size_t, float>
WetRoom::getSlipperyNeighbors(size_t row, size_t col, Action a, WetLevel w) const {
  map<size_t, float> neighbors;
  size_t expected_neighbor = numeric_limits<size_t>::max();
  float sum_proba = 0.0f;
  auto distNeighbors = bind(uniform_real_distribution<float>(0.0f,
                              w == SlightlyWet ? r_slightly : r_heavily),
                            ref(gen));
  const auto setProba = [&](size_t neighbor) {
    const float proba = distNeighbors();
    neighbors[neighbor] = proba;
    sum_proba += proba;
  };

  // no-op outcome
  const size_t s_id = state_id(row, col);
  setProba(s_id);

  // check diagonal neighbors
  const size_t up_left    = state_id(row - 1, col - 1);
  const size_t up_right   = state_id(row - 1, col + 1);
  const size_t down_left  = state_id(row + 1, col - 1);
  const size_t down_right = state_id(row + 1, col + 1);

  if(a == Up) {
    expected_neighbor = state_id(row - 1, col);
    if(up_left != numeric_limits<size_t>::max())
      setProba(up_left);
    if(up_right != numeric_limits<size_t>::max())
      setProba(up_right);
  } else if(a == Down) {
    expected_neighbor = state_id(row + 1, col);
    if(down_left != numeric_limits<size_t>::max())
      setProba(down_left);
    if(down_right != numeric_limits<size_t>::max())
      setProba(down_right);
  } else if(a == Left) {
    expected_neighbor = state_id(row, col - 1);
    if(up_left != numeric_limits<size_t>::max())
      setProba(up_left);
    if(down_left != numeric_limits<size_t>::max())
      setProba(down_left);
  } else if(a == Right) {
    expected_neighbor = state_id(row, col + 1);
    if(up_right != numeric_limits<size_t>::max())
      setProba(up_right);
    if(down_right != numeric_limits<size_t>::max())
      setProba(down_right);
  } else {
    cerr << "Invalid action" << endl;
    exit(EXIT_FAILURE);
  }

  // expected outcome
  neighbors[expected_neighbor] = 1.f - sum_proba;

  return neighbors;
}

size_t WetRoom::getRandomPosition() const {
  auto distState = bind(uniform_int_distribution<size_t>(0, nStates - 1), ref(gen));
  return distState();
}

WetRoom::WetLevel WetRoom::generateWetLevel() const {
  const bool isWet = distProb() < p;
  if(!isWet)
    return Dry;

  // isWet (either slightly or heavily)
  return distProb() < q ? SlightlyWet : HeavilyWet;
}

int main(int argc, char* argv[]) {
  if(argc < 2 || argc > 3) {
    cerr << "Usage: " << argv[0] << " side_length [num_rooms=1]" << endl;
    return EXIT_FAILURE;
  }

  const size_t sideLength = strtoul(argv[1], nullptr, 10);
  const size_t nRooms = argc == 2 ? 1 : strtoul(argv[2], nullptr, 10);
  const size_t nStates = (sideLength * sideLength) * nRooms;

  cout << nStates + 1 << '\n';
  for(size_t i = 0; i < nRooms; ++i) {
    const size_t offset = i * (sideLength * sideLength);
    const WetRoom wetRoom(sideLength, sideLength, offset);
    wetRoom.generateAndPrint();
  }

  // end state has no actions
  cout << nStates << ' ' << 0 << endl;

  return EXIT_SUCCESS;
}
