lightningdot/uniter_model/format_vcr_predictions.py


								import pandas as pd

								import json

								import os

								import argparse

								import numpy as np


								def main(opts):

								    with open(os.path.join(opts.input_folder, opts.pred_file), "r") as f:

								        data = json.load(f)

								        probs_grp = []

								        ids_grp = []

								        ordered_data = sorted(data.items(),

								                              key=lambda item: int(item[0].split("-")[1]))

								        for annot_id, scores in ordered_data:

								            ids_grp.append(annot_id)

								            probs_grp.append(np.array(scores).reshape(1, 5, 4))


								    # Double check the IDs are in the same order for everything

								    # assert [x == ids_grp[0] for x in ids_grp]


								    probs_grp = np.stack(probs_grp, 1)

								    # essentially probs_grp is a [num_ex, 5, 4] array of probabilities.

								    # The 5 'groups' are

								    # [answer, rationale_conditioned_on_a0, rationale_conditioned_on_a1,

								    #          rationale_conditioned_on_a2, rationale_conditioned_on_a3].

								    # We will flatten this to a CSV file so it's easy to submit.

								    group_names = ['answer'] + [f'rationale_conditioned_on_a{i}'

								                                for i in range(4)]

								    probs_df = pd.DataFrame(data=probs_grp.reshape((-1, 20)),

								                            columns=[f'{group_name}_{i}'

								                            for group_name in group_names for i in range(4)])

								    probs_df['annot_id'] = ids_grp

								    probs_df = probs_df.set_index('annot_id', drop=True)

								    probs_df.to_csv(os.path.join(opts.input_folder, opts.output_file))


								if __name__ == "__main__":

								    parser = argparse.ArgumentParser()


								    # Required parameters

								    parser.add_argument("--pred_file",

								                        default=None, type=str,

								                        help="The input JSON file.")

								    parser.add_argument("--output_file",

								                        default=None, type=str,

								                        help="The output CSV file.")

								    parser.add_argument(

								        "--input_folder", default=None, type=str,

								        help="The directory where the predicted JSON files are in")


								    args = parser.parse_args()


								    main(args)