그 외
[그 외] etri mrc json 파일을 df로 바꾸는 코드
Dong's Universe
2023. 6. 18. 14:17
with open("../data/20181101_ETRI_MRC_v1.json", 'r') as f:
jf = json.load(f)
titles = []
contexts = []
questions = []
ids = []
answers = []
document_ids = []
index_level_0s = []
print(len(jf['data']))
for data in jf['data']:
title = data['title']
context = data['paragraphs'][0]['context']
question = data['paragraphs'][0]['qas'][0]['question']
id = data['paragraphs'][0]['qas'][0]['id']
answer = {'answer_start': [data['paragraphs'][0]['qas'][0]['answers'][0]['answer_start']],
'text': [data['paragraphs'][0]['qas'][0]['answers'][0]['text']]}
document_id = 0
index_level_0 = 0
for ele in data['paragraphs'][0]['qas']:
question = ele['question']
id = ele['id']
answer = {'answer_start': [ele['answers'][0]['answer_start']],
'text': [ele['answers'][0]['text']]}
titles.append(title)
contexts.append(context)
questions.append(question)
ids.append(id)
answers.append(answer)
document_ids.append(document_id)
index_level_0s.append(index_level_0)
df = pd.DataFrame({
'title': titles,
'context': contexts,
'question': questions,
'id': ids,
'answers': answers,
'document_id': document_ids,
'__index_level_0__': index_level_0s
})
df