Python Programs to join, merge and concatenate Dataframes – 2022

Machine Learning Projects

Hey guys, in this blog we will see Python Programs to join, merge, and concatenate Dataframes.

Joining Dataframes

Joining on Index

#Joining
# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32]} 
    
# Define a dictionary containing employee data 
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']} 
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1,index=['K0', 'K1', 'K2', 'K3'])
  
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
 
 
print(df1, "\n\n", df2)  

# joining dataframe
df = df1.join(df2)
df
join

Here we have simply used df1.join(df2) which says that join df2 with df1. Keep all the data of df1 and only matching indices from df2.

Using the “how” parameter

We will use how = ‘outer’ in order to get a union of the two dataframes.

# joining dataframe
df = df1.join(df2,how='outer')
join

We will use how = ‘inner’ in order to get an intersection of the two dataframes.

# joining dataframe
df = df1.join(df2,how='inner')
join

Using the “on” parameter

Joining dataframe using on in an argument.

# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],
        'Key':['K0', 'K1', 'K2', 'K3']} 
    
# Define a dictionary containing employee data 
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']} 
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
  
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
 
 
print(df1, "\n\n", df2) 

# using on argument in join
df = df1.join(df2, on='Key')
 
df
Joining dataframe using on in an argument.

Merging Dataframes

Example 1: Merging dataframe using one join key

# Merging
# importing pandas module
import pandas as pd 
 
    
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
    
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
print(df1)
print('\n\n')


# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 


# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
print(df2)
print('\n\n')


df = pd.merge(df1,df2,on='key')
df
Merging dataframe using one join key

Here we are using pandas.merge(df1,df2,on=’key’) which says that joining both the dataframes df1 and df2 on the key column.

Example 2: Merging dataframe using multiple join keys

# Merging on multiple keys
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
  
 
print(df1, "\n\n", df2) 

# merging dataframe using multiple keys
df = pd.merge(df1, df2, on=['key', 'key1'])
 
df
Merging dataframe using multiple join keys

Types of Merges

MERGE METHODJOIN NAMEDESCRIPTION
leftLEFT OUTER JOINUse keys from the left frame only
rightRIGHT OUTER JOINUse keys from the right frame only
outerFULL OUTER JOINUse union of keys from both frames
innerINNER JOINUse intersection of keys from both frames
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
  
merge

Left Merge

 # using keys from left frame
df = pd.merge(df1, df2, how='left', on=['key', 'key1'])
 
left merge

Right Merge

 # using keys from right frame
df = pd.merge(df1, df2, how='right', on=['key', 'key1'])
 
right merge

Outer Merge

# getting union  of keys
df = pd.merge(df1, df2, how='outer', on=['key', 'key1'])
 
outer merge

Inner Merge

# getting intersection of keys
df = pd.merge(df1, df2, how='inner', on=['key', 'key1'])
inner merge

Concatenating Dataframes

# Concatenating
# importing pandas module
import pandas as pd 
 
# Define a dictionary 1 containing employee data 
data1 = {'Name':['Abhishek', 'Shubham', 'Aman', 'Vivek'], 
        'Age':[22, 24, 26, 28], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 

# Convert the dictionary 1 into DataFrame  
df1 = pd.DataFrame(data1,index=[0, 1, 2, 3])
print(df1)
print('\n\n')

# Define a dictionary 2 containing employee data 
data2 = {'Name':['Nishant', 'Sanchit', 'Aniket', 'Amit'], 
        'Age':[19, 15, 15, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
 
# Convert the dictionary 2 into DataFrame  
df2 = pd.DataFrame(data2, index=[4, 5, 6, 7])
print(df2)
print('\n\n')


df = pd.concat([df1,df2])
df
concat

Here we are using pd.concat([df1,df2]) which says that concatenate both the dataframes row wise (default axis=0).

Concatenating DataFrame by setting logic on axes

# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
        'Mobile No': [97, 91, 58, 76]} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'], 
        'Age':[22, 32, 12, 52], 
        'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
        'Salary':[1000, 2000, 3000, 4000]} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=[2, 3, 6, 7]) 
 
print(df1, "\n\n", df2) 

# applying concat with axes
# join = 'inner'
df = pd.concat([df1, df2], axis=1, join='inner')
 
df
concat
# using a .concat for
# union of dataframe
res2 = pd.concat([df, df1], axis=1, sort=False)
 
res2
concat

Concatenating DataFrame by ignoring indexes

# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
        'Mobile No': [97, 91, 58, 76]} 
    
# Define a dictionary containing employee data 
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'], 
        'Age':[22, 32, 12, 52], 
        'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
        'Salary':[1000, 2000, 3000, 4000]} 
  
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[2, 3, 6, 7]) 
  
  
print(df, "\n\n", df1) 

# using ignore_index
res = pd.concat([df, df1], ignore_index=True)
 
res
concat

Concatenating DataFrame with group keys

# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'], 
        'Age':[17, 14, 12, 52], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
 
print(df, "\n\n", df1) 

# using keys 
frames = [df, df1 ]
 
res = pd.concat(frames, keys=['x', 'y'])
res
concat

Check out our other python programming examples

Leave a Comment

Your email address will not be published. Required fields are marked *