Например, вот так:
import numpy as np
import pandas as pd
print('pandas version: ' + pd.__version__)
df1 = pd.DataFrame([[1,'M'],
[2 ,'M',],
[3,'F'],
[4,'F']], columns=['customer_id', 'gender'])
print('df1:')
print(df1)
df2 = pd.DataFrame([[1,100,'yellow'],
[2 ,150,'black'],
[3,10, 'black'],
[4,700,'red'],
[5,200,'green'],
[6,170,'white']], columns=['customer_id', 'feature1', 'feature2'])
print('df2:')
print(df2)
train_df=df2[df2['customer_id'].isin(df1['customer_id'])]
print('train_df:')
print(train_df)
test_df=df2[~df2['customer_id'].isin(df1['customer_id'])]
print('test_df:')
print(test_df)
pandas version: 0.18.1
df1:
customer_id gender
0 1 M
1 2 M
2 3 F
3 4 F
df2:
customer_id feature1 feature2
0 1 100 yellow
1 2 150 black
2 3 10 black
3 4 700 red
4 5 200 green
5 6 170 white
train_df:
customer_id feature1 feature2
0 1 100 yellow
1 2 150 black
2 3 10 black
3 4 700 red
test_df:
customer_id feature1 feature2
4 5 200 green
5 6 170 white