import numpy as np
import pandas as pd
print('pandas version: ' + pd.__version__)
print()
print('Source dataframe:')
X = pd.DataFrame([['John', 10],
['Mike', None],
['Alice', 20],
['Eve', None]], columns=['Name', 'Salary'])
print(X)
X['Salary'].fillna(0.0, inplace=True)
print()
print('Target dataframe:')
print(X)
pandas version: 0.18.1
Source dataframe:
Name Salary
0 John 10.0
1 Mike NaN
2 Alice 20.0
3 Eve NaN
Target dataframe:
Name Salary
0 John 10.0
1 Mike 0.0
2 Alice 20.0
3 Eve 0.0
import numpy as np
import pandas as pd
print('pandas version: ' + pd.__version__)
df1 = pd.DataFrame([[1,'M'],
[2 ,'M',],
[3,'F'],
[4,'F']], columns=['customer_id', 'gender'])
print('df1:')
print(df1)
df2 = pd.DataFrame([[1,100,'yellow'],
[2 ,150,'black'],
[3,10, 'black'],
[4,700,'red'],
[5,200,'green'],
[6,170,'white']], columns=['customer_id', 'feature1', 'feature2'])
print('df2:')
print(df2)
train_df=df2[df2['customer_id'].isin(df1['customer_id'])]
print('train_df:')
print(train_df)
test_df=df2[~df2['customer_id'].isin(df1['customer_id'])]
print('test_df:')
print(test_df)
pandas version: 0.18.1
df1:
customer_id gender
0 1 M
1 2 M
2 3 F
3 4 F
df2:
customer_id feature1 feature2
0 1 100 yellow
1 2 150 black
2 3 10 black
3 4 700 red
4 5 200 green
5 6 170 white
train_df:
customer_id feature1 feature2
0 1 100 yellow
1 2 150 black
2 3 10 black
3 4 700 red
test_df:
customer_id feature1 feature2
4 5 200 green
5 6 170 white