import pandas as pd
import json
import os
import argparse
import numpy as np


def main(opts):
    with open(os.path.join(opts.input_folder, opts.pred_file), "r") as f:
        data = json.load(f)
        probs_grp = []
        ids_grp = []
        ordered_data = sorted(data.items(),
                              key=lambda item: int(item[0].split("-")[1]))
        for annot_id, scores in ordered_data:
            ids_grp.append(annot_id)
            probs_grp.append(np.array(scores).reshape(1, 5, 4))

    # Double check the IDs are in the same order for everything
    # assert [x == ids_grp[0] for x in ids_grp]

    probs_grp = np.stack(probs_grp, 1)
    # essentially probs_grp is a [num_ex, 5, 4] array of probabilities.
    # The 5 'groups' are
    # [answer, rationale_conditioned_on_a0, rationale_conditioned_on_a1,
    #          rationale_conditioned_on_a2, rationale_conditioned_on_a3].
    # We will flatten this to a CSV file so it's easy to submit.
    group_names = ['answer'] + [f'rationale_conditioned_on_a{i}' 
                                for i in range(4)]
    probs_df = pd.DataFrame(data=probs_grp.reshape((-1, 20)),
                            columns=[f'{group_name}_{i}'
                            for group_name in group_names for i in range(4)])
    probs_df['annot_id'] = ids_grp
    probs_df = probs_df.set_index('annot_id', drop=True)
    probs_df.to_csv(os.path.join(opts.input_folder, opts.output_file))

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--pred_file",
                        default=None, type=str,
                        help="The input JSON file.")
    parser.add_argument("--output_file",
                        default=None, type=str,
                        help="The output CSV file.")
    parser.add_argument(
        "--input_folder", default=None, type=str,
        help="The directory where the predicted JSON files are in")

    args = parser.parse_args()

    main(args)