6

[Space-S x KaKR] 그래프 러닝 및 해커톤

SPACE-S

학습데이터 테스트데이터 만들기

paul77ms
2022.10.06 16:10
219

학습데이터와 테스트데이터를 불러오고

데이터 구조를 파악해보고

마지막으로 Smiles 데이터와 병합시켜주어

최종적으로 학습될 학습데이터와 테스트데이터를 만들어주었습니다.

 

import pandas as pd
import numpy as np

test = torch.load('../input/kagglekorea/qm9_test_data.pt')
train = torch.load('../input/kagglekorea/qm9_train_data.pt')
submission = pd.read_csv('../input/kagglekorea/sample_submission.csv')
mol = pd.read_csv('../input/kagglekorea/mol_id_to_smiles.csv')
smiles = pd.read_csv('../input/kagglekorea/mol_id_to_smiles.csv')

submission.shape, mol.shape

submission

pd.DataFrame(zip(list(test.keys()), list(test.values()))).T

test.keys(), train.keys()

train['mu']

empty_train_set = pd.DataFrame()
empty_train_3d_set = []
for key in train.keys():
    if train[key].ndim == 1:
        empty_train_set = pd.concat([empty_train_set, pd.DataFrame(train[key])], axis = 1)
    elif train[key].ndim == 3:
        empty_train_3d_set.append(train[key])
        
empty_train_set.columns = list(train.keys())[:-4]

empty_test_set = pd.DataFrame()
empty_test_3d_set = []
for key in test.keys():
    if test[key].ndim == 1:
        empty_test_set = pd.concat([empty_test_set, pd.DataFrame(test[key])], axis = 1)
    elif test[key].ndim == 3:
        empty_test_3d_set.append(test[key])
        
empty_train_3d_set[0].shape

empty_test_3d_set[0].shape

m,n,r = empty_train_3d_set[0].shape
train_stack = np.column_stack((np.repeat(np.arange(m),n),empty_train_3d_set[0].reshape(m*n,-1)))
train_3d = pd.DataFrame(train_stack)


m,n,r = empty_test_3d_set[0].shape
test_stack = np.column_stack((np.repeat(np.arange(m),n),empty_test_3d_set[0].reshape(m*n,-1)))
test_3d = pd.DataFrame(test_stack)

train_3d

test_3d

pd.DataFrame(dict(rows = list(empty_train_3d_set[0])))

pd.DataFrame(dict(rows = list(empty_test_3d_set[0])))

empty_test_set.columns = list(test.keys())[:-4]

empty_test_set

X_train = empty_train_set.drop(['mu'], axis=1)
y_train = empty_train_set['mu']
X_test = empty_test_set

X_train.rename(columns = {X_train.columns[0] : 'id'}, inplace = True)
X_test.rename(columns = {X_test.columns[0] : 'id'}, inplace = True)

smiles.rename(columns = {smiles.columns[0] : 'id'}, inplace = True)
smiles.rename(columns = {smiles.columns[0] : 'id'}, inplace = True)

X_train.columns, smiles.columns

X_train = X_train.merge(smiles, left_on='id', right_on = 'id')

X_train

X_test = X_test.merge(smiles, left_on='id', right_on = 'id')

X_test

import torch
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers import Dense, Activation, Dropout, Flatten
6
2개의 댓글
로그인 후 이용해주세요!