학습데이터와 테스트데이터를 불러오고
데이터 구조를 파악해보고
마지막으로 Smiles 데이터와 병합시켜주어
최종적으로 학습될 학습데이터와 테스트데이터를 만들어주었습니다.
import pandas as pd
import numpy as np
test = torch.load('../input/kagglekorea/qm9_test_data.pt')
train = torch.load('../input/kagglekorea/qm9_train_data.pt')
submission = pd.read_csv('../input/kagglekorea/sample_submission.csv')
mol = pd.read_csv('../input/kagglekorea/mol_id_to_smiles.csv')
smiles = pd.read_csv('../input/kagglekorea/mol_id_to_smiles.csv')
submission.shape, mol.shape
submission
pd.DataFrame(zip(list(test.keys()), list(test.values()))).T
test.keys(), train.keys()
train['mu']
empty_train_set = pd.DataFrame()
empty_train_3d_set = []
for key in train.keys():
if train[key].ndim == 1:
empty_train_set = pd.concat([empty_train_set, pd.DataFrame(train[key])], axis = 1)
elif train[key].ndim == 3:
empty_train_3d_set.append(train[key])
empty_train_set.columns = list(train.keys())[:-4]
empty_test_set = pd.DataFrame()
empty_test_3d_set = []
for key in test.keys():
if test[key].ndim == 1:
empty_test_set = pd.concat([empty_test_set, pd.DataFrame(test[key])], axis = 1)
elif test[key].ndim == 3:
empty_test_3d_set.append(test[key])
empty_train_3d_set[0].shape
empty_test_3d_set[0].shape
m,n,r = empty_train_3d_set[0].shape
train_stack = np.column_stack((np.repeat(np.arange(m),n),empty_train_3d_set[0].reshape(m*n,-1)))
train_3d = pd.DataFrame(train_stack)
m,n,r = empty_test_3d_set[0].shape
test_stack = np.column_stack((np.repeat(np.arange(m),n),empty_test_3d_set[0].reshape(m*n,-1)))
test_3d = pd.DataFrame(test_stack)
train_3d
test_3d
pd.DataFrame(dict(rows = list(empty_train_3d_set[0])))
pd.DataFrame(dict(rows = list(empty_test_3d_set[0])))
empty_test_set.columns = list(test.keys())[:-4]
empty_test_set
X_train = empty_train_set.drop(['mu'], axis=1)
y_train = empty_train_set['mu']
X_test = empty_test_set
X_train.rename(columns = {X_train.columns[0] : 'id'}, inplace = True)
X_test.rename(columns = {X_test.columns[0] : 'id'}, inplace = True)
smiles.rename(columns = {smiles.columns[0] : 'id'}, inplace = True)
smiles.rename(columns = {smiles.columns[0] : 'id'}, inplace = True)
X_train.columns, smiles.columns
X_train = X_train.merge(smiles, left_on='id', right_on = 'id')
X_train
X_test = X_test.merge(smiles, left_on='id', right_on = 'id')
X_test
import torch
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers import Dense, Activation, Dropout, Flatten
