[그 외] korquad json 파일을 df로 바꾸는 코드

2023. 6. 18. 14:11그 외

with open("../data/KorQuAD_v1.0_dev.json", 'r') as f:
    jf = json.load(f)
    
titles = []
contexts = []
questions = []
ids = []
answers = []
document_ids = []
index_level_0s = []

print(len(jf['data']))

for data in jf['data']:
    title = data['title']
    document_id = 0
    index_level_0 = 0
    
    for ele in data['paragraphs']:
        context = ele['context']
        for subele in ele['qas']:
            answer = {'answer_start': [subele['answers'][0]['answer_start']],
               'text': [subele['answers'][0]['text']]}
            id = subele['id']
            question = subele['question']
        
            titles.append(title)
            contexts.append(context)
            questions.append(question)
            ids.append(id)
            answers.append(answer)
            document_ids.append(document_id)
            index_level_0s.append(index_level_0)
    

df = pd.DataFrame({
'title': titles,
'context': contexts,
'question': questions,
'id': ids,
'answers': answers,
'document_id': document_ids,
'__index_level_0__': index_level_0s
})

df