0
votes
This article is a work in progress. We are a team of two and we just started exploring the dataset.
Below is a link to Github –
https://github.com/shamafarabi/Datathon
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
r=pd.read_csv('sales.csv')
masterdata=pd.read_csv('item_lookup.csv')
In [3]:
r.head()
Out[3]:
In [4]:
r.info()
In [13]:
masterdata.head(50)
Out[13]:
In [6]:
r[r.duplicated()]
Out[6]:
In [7]:
masterdata[masterdata.duplicated()]
Out[7]:
In [8]:
#Removing the space char
masterdata.columns = ['item_id', 'item_name', 'unit', 'order_qty', 'transport_qty',
'min_stock', 'max_stock', 'item_prio', 'storage_cost', 'mhd']
print(masterdata.columns)
In [14]:
"""
Now lets turn all the int64 into float so that all is good for later plotings
"""
masterdata.item_id = masterdata.item_id.astype(float)
masterdata.order_qty = masterdata.order_qty.astype(float)
masterdata.transport_qty = masterdata.transport_qty.astype(float)
masterdata.min_stock = masterdata.min_stock.astype(float)
masterdata.max_stock = masterdata.max_stock.astype(float)
masterdata.mhd = masterdata.mhd.astype(float)
masterdata.info()
In [15]:
"""
Obviously it is not very informative to plot all the features in one scatterplot,
but let's just see how it look like and then we will make separate plots
"""
sns.scatterplot(x = 'item_id', y = 'order_qty', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'transport_qty', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'min_stock', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'max_stock', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'item_prio', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'storage_cost', data=masterdata)
sns.scatterplot(x = 'item_id', y = 'mhd', data=masterdata)
plt.show()
In [ ]: