import pandas as pd
import numpy as np
df = pd.read_csv('C:/Users/kuldip_s/Documents/Python
Scripts/drinks.csv')
df
Out[13]:
country |
beer_servings |
spirit_servings |
wine_servings |
total_litres_of_pure_alcohol |
continent |
|
0 |
Afghanistan |
0 |
0 |
0 |
0.0 |
AS |
1 |
Albania |
89 |
132 |
54 |
4.9 |
EU |
2 |
Algeria |
25 |
0 |
14 |
0.7 |
AF |
3 |
Andorra |
245 |
138 |
312 |
12.4 |
EU |
4 |
Angola |
217 |
57 |
45 |
5.9 |
AF |
... |
... |
... |
... |
... |
... |
... |
188 |
Venezuela |
333 |
100 |
3 |
7.7 |
SA |
189 |
Vietnam |
111 |
2 |
1 |
2.0 |
AS |
190 |
Yemen |
6 |
0 |
0 |
0.1 |
AS |
191 |
Zambia |
32 |
19 |
4 |
2.5 |
AF |
192 |
Zimbabwe |
64 |
18 |
4 |
4.7 |
AF |
193 rows × 6 columns
In [5]:
pd.options.display.max_rows
Out[5]:
60
In [6]:
pd.options.display.max_columns
Out[6]:
20
In [7]:
pd.get_option('max_rows')
Out[7]:
60
In [8]:
pd.get_option('max_columns')
Out[8]:
20
In [10]:
pd.reset_option('max_rows')
df
Out[10]:
country |
beer_servings |
spirit_servings |
wine_servings |
total_litres_of_pure_alcohol |
continent |
|
0 |
Afghanistan |
0 |
0 |
0 |
0.0 |
AS |
1 |
Albania |
89 |
132 |
54 |
4.9 |
EU |
2 |
Algeria |
25 |
0 |
14 |
0.7 |
AF |
3 |
Andorra |
245 |
138 |
312 |
12.4 |
EU |
4 |
Angola |
217 |
57 |
45 |
5.9 |
AF |
... |
... |
... |
... |
... |
... |
... |
192 |
Zimbabwe |
64 |
18 |
4 |
4.7 |
AF |
193 rows × 6 columns
In [11]:
pd.options.display.precision
Out[11]:
6
In [14]:
pdf = pd.DataFrame(np.random.randn(5,5))
pdf
Out[14]:
0 |
1 |
2 |
3 |
4 |
|
0 |
-1.309994 |
-0.321530 |
0.570290 |
1.319875 |
1.119663 |
1 |
0.542382 |
1.481424 |
0.740494 |
2.020025 |
0.461774 |
2 |
-0.803047 |
0.648151 |
-0.527519 |
0.032987 |
-0.177919 |
3 |
-0.693263 |
0.252870 |
-0.072195 |
-0.719892 |
0.354932 |
4 |
-0.282920 |
0.180100 |
0.994994 |
0.139111 |
-1.761986 |
In [15]:
type(pdf)
Out[15]:
pandas.core.frame.DataFrame
In [16]:
np.random.randn(5,5)
Out[16]:
array([[-0.51844628,
-0.72715875, -0.89111341, 0.02909364, -1.73662819],
[ 0.39410077, -0.25912634, 0.04872252,
0.29090885, 0.70473386],
[-0.67857312, -1.25726342, 0.17568482,
0.95954918, 0.13878635],
[-0.98655845, 1.28251441, -0.32269209, 1.75882543,
1.18835483],
[ 0.26609078, 1.55897722, -1.60704113, 0.76393953,
0.8338056 ]])
In [17]:
type(np.random.randn(5,5))
Out[17]:
numpy.ndarray
In [18]:
import matplotlib.pyplot as plt
%matplotlib inline
In [19]:
sdf = pd.read_csv('C:/Users/kuldip_s/Documents/Python
Scripts/Stock.csv')
sdf
Out[19]:
Day |
Open |
High |
Low |
Close |
Adj Close |
Volume |
|
0 |
1 |
16.770000 |
17.559999 |
16.730000 |
17.010000 |
16.934999 |
271700 |
1 |
2 |
17.100000 |
17.469999 |
16.950001 |
17.200001 |
17.200001 |
229800 |
2 |
3 |
17.059999 |
17.230000 |
16.870001 |
17.049999 |
17.049999 |
146300 |
17 |
18 |
17.299999 |
17.389999 |
16.450001 |
16.510000 |
16.510000 |
300000 |
18 |
19 |
16.299999 |
16.629999 |
16.000000 |
16.459999 |
16.459999 |
324900 |
19 |
20 |
16.459999 |
16.580000 |
15.980000 |
16.040001 |
16.040001 |
141600 |
In [21]:
sdf.plot()
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x1819d141ac8>
In [22]:
sdf['Open'].plot()
Out[22]:
<matplotlib.axes._subplots.AxesSubplot
at 0x1819d4a8708>
In [24]:
plt.style.available
Out[24]:
['bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark-palette',
'seaborn-dark',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'seaborn',
'Solarize_Light2',
'tableau-colorblind10',
'_classic_test']
In [25]:
plt.style.use('seaborn')
sdf['Open'].plot()
Out[25]:
<matplotlib.axes._subplots.AxesSubplot
at 0x1819d66adc8>
In [29]:
plt.style.use('dark_background')
sdf[['Open','Close']].plot() # Multiple columns by putting those in a List
Out[29]:
<matplotlib.axes._subplots.AxesSubplot
at 0x1819e82b9c8>
In [30]:
plt.style.use('ggplot')
sdf['Open'].plot.line()
Out[30]:
<matplotlib.axes._subplots.AxesSubplot
at 0x1819e8b0c08>
In [34]:
plt.style.use('seaborn-notebook')
sdf[['Open','Close']].plot.bar()
Out[34]:
<matplotlib.axes._subplots.AxesSubplot
at 0x1819eb6de88>
In [35]:
df.to_dict()
. . .
In [36]:
# Import csv data to
Pandas
df = pd.read_csv('C:/Users/kuldip_s/Documents/Python
Scripts/drinks.csv')
# Export xls data from
Pandas
df.to_excel('C:/Users/kuldip_s/Documents/Python Scripts/drinks_xls.xlsx')
In [37]:
df.columns
Out[37]:
Index(['country',
'beer_servings', 'spirit_servings', 'wine_servings',
'total_litres_of_pure_alcohol',
'continent'],
dtype='object')
In [39]:
# Export selected columns
xls data from Pandas
df[['country','total_litres_of_pure_alcohol','continent']].to_excel('C:/Users/kuldip_s/Documents/Python
Scripts/selected_columns.xlsx')
In [42]:
df.to_json('C:/Users/kuldip_s/Documents/Python
Scripts/selected_columns.json')
In [43]:
df.to_html('C:/Users/kuldip_s/Documents/Python
Scripts/selected_columns.html')
Working with Text data
In [53]:
import pandas as pd
data = pd.read_csv('C:/Users/kuldip_s/Documents/Python
Scripts/SMSSpamCollection.txt',sep='\t',
header = None, names=['Status','Message'])
type(data)
Out[53]:
pandas.core.frame.DataFrame
In [70]:
data.head(10)
Out[70]:
Status |
Message |
|
0 |
ham |
Go until jurong point,
crazy.. Available only ... |
1 |
ham |
Ok lar... Joking wif u
oni... |
2 |
spam |
Free entry in 2 a wkly
comp to win FA Cup fina... |
3 |
ham |
U dun say so early
hor... U c already then say... |
4 |
ham |
Nah I don't think he
goes to usf, he lives aro... |
5 |
spam |
FreeMsg Hey there
darling it's been 3 week's n... |
6 |
ham |
Even my brother is not
like to speak with me. ... |
7 |
ham |
As per your request
'Melle Melle (Oru Minnamin... |
8 |
spam |
WINNER!! As a valued
network customer you have... |
9 |
spam |
Had your mobile 11 months
or more? U R entitle... |
In [56]:
data['Status'].upper()
---------------------------------------------------------------------------
AttributeError
Traceback (most recent call last)
<ipython-input-56-677f1276ac31> in <module>
---->
1 data['Status'].upper()
~\Downloads\Anaconda\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5177 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5178 return self[name]
->
5179 return object.__getattribute__(self, name)
5180
5181 def __setattr__(self, name, value):
AttributeError: 'Series' object has no attribute 'upper'
In [58]:
data['Status'].str.upper().head() # Upper case
Out[58]:
0 HAM
1 HAM
2 SPAM
3 HAM
4 HAM
Name:
Status, dtype: object
In [59]:
data['Status'].str.lower().head() # Lower case
Out[59]:
0 ham
1 ham
2 spam
3 ham
4 ham
Name:
Status, dtype: object
In [60]:
data['Status'].str.title().head() # Camel case
Out[60]:
0 Ham
1 Ham
2 Spam
3 Ham
4 Ham
Name:
Status, dtype: object
In [61]:
data['Status'].str.len().head() # find the length of the text column
Out[61]:
0 3
1 3
2 4
3 3
4 3
Name:
Status, dtype: int64
In [62]:
data['Message'].str.len().head()
Out[62]:
0 111
1 29
2 155
3 49
4 61
Name:
Message, dtype: int64
In [65]:
data['Message'][0] # find the value of the 1st row for Message column
Out[65]:
'Go
until jurong point, crazy.. Available only in bugis n great world la e
buffet... Cine there got amore wat...'
In [66]:
data['Message'][1] # find the value of the 2nd row for Message column
Out[66]:
'Ok
lar... Joking wif u oni...'
In [77]:
spam_msg=data[data['Status'] == 'spam'] # Find all the rows for Status = 'spam'
spam_msg.head()
Out[77]:
Status |
Message |
|
2 |
spam |
Free entry in 2 a wkly
comp to win FA Cup fina... |
5 |
spam |
FreeMsg Hey there
darling it's been 3 week's n... |
8 |
spam |
WINNER!! As a valued
network customer you have... |
9 |
spam |
Had your mobile 11
months or more? U R entitle... |
11 |
spam |
SIX chances to win
CASH! From 100 to 20,000 po... |
In [76]:
spam_msg['Message'].head() # Find all the Message column values only for
Status = spam
Out[76]:
2 Free entry in 2 a wkly comp to win FA Cup
fina...
5 FreeMsg Hey there darling it's been 3
week's n...
8 WINNER!! As a valued network customer you
have...
9 Had your mobile 11 months or more? U R
entitle...
11 SIX
chances to win CASH! From 100 to 20,000 po...
Name:
Message, dtype: object
In [79]:
'I am learning Pandas'
Out[79]:
'I
am learning Pandas'
In [80]:
'I am learning Pandas'.replace('I am','You are') # Replace a string
Out[80]:
'You
are learning Pandas'
In [83]:
type(spam_msg['Message'])
Out[83]:
pandas.core.series.Series
In [84]:
type(data)
Out[84]:
pandas.core.frame.DataFrame
In [85]:
data['Message'][3]
Out[85]:
'U
dun say so early hor... U c already then say...'
In [86]:
data['Message'][3].replace('...','.')
Out[86]:
'U
dun say so early hor. U c already then say.'
In [94]:
data['Message'][6].replace('.',' . ') # Adding white spaces
Out[94]:
'Even
my brother is not like to speak with me
. They treat me like aids
patent .
'
In [95]:
data['Message'][6].strip() # removes the white spaces
Out[95]:
'Even
my brother is not like to speak with me. They treat me like aids patent.'
In [97]:
data['Message']
Out[97]:
0 Go until jurong point, crazy.. Available
only ...
1 Ok lar... Joking wif
u oni...
2 Free entry in 2 a wkly comp to win FA
Cup fina...
3 U dun say so early hor... U c already
then say...
4 Nah I don't think he goes to usf, he
lives aro...
...
5567 This is the 2nd time we have tried 2
contact u...
5568 Will ü b going to esplanade fr
home?
5569 Pity, * was in mood for that. So...any
other s...
5570 The guy did some bitching but I acted like
i'd...
5571 Rofl. Its true to
its name
Name:
Message, Length: 5572, dtype: object
In [105]:
data[data['Message'].str.startswith('This')].head() # records where Message starts with word 'This'
Out[105]:
Status |
Message |
|
545 |
ham |
This girl does not
stay in bed. This girl does... |
668 |
ham |
This pay is
<DECIMAL> lakhs:) |
791 |
ham |
This is hoping you
enjoyed your game yesterday... |
1853 |
spam |
This is the 2nd time
we have tried 2 contact u... |
1878 |
ham |
This pen thing is
beyond a joke. Wont a Biro d... |
In [106]:
data[data['Message'].str.endswith('...')].head() # records where Message starts with word '...'
Out[106]:
Status |
Message |
|
0 |
ham |
Go until jurong point,
crazy.. Available only ... |
1 |
ham |
Ok lar... Joking wif u
oni... |
3 |
ham |
U dun say so early
hor... U c already then say... |
22 |
ham |
So ü pay first lar...
Then when is da stock co... |
38 |
ham |
Anything lor... U
decide... |
In [111]:
Sorry_word = data['Message'].str.contains('sorry') # Message column conatains 'sorry' word
data[Sorry_word].head()
Out[111]:
Status |
Message |
|
53 |
ham |
Wow. I never realized
that you were so embaras... |
192 |
ham |
I'm sorry. I've joined
the league of people th... |
624 |
ham |
sorry, no, have got
few things to do. may be i... |
706 |
ham |
I don't think I can
get away for a trek that l... |
755 |
ham |
Realy sorry-i don't
recognise this number and ... |
In [112]:
len(Sorry_word) # How many records are there where 'sorry' word is
available in 'message' column?
Out[112]:
5572
In [121]:
sorry_msg=data[Sorry_word]
sorry_msg.head()
Out[121]:
Status |
Message |
|
53 |
ham |
Wow. I never realized
that you were so embaras... |
192 |
ham |
I'm sorry. I've joined
the league of people th... |
624 |
ham |
sorry, no, have got
few things to do. may be i... |
706 |
ham |
I don't think I can
get away for a trek that l... |
755 |
ham |
Realy sorry-i don't
recognise this number and ... |
In [126]:
sorry_msg[sorry_msg['Status']=='ham'].head() # Out of above result
Out[126]:
Status |
Message |
|
53 |
ham |
Wow. I never realized
that you were so embaras... |
192 |
ham |
I'm sorry. I've joined
the league of people th... |
624 |
ham |
sorry, no, have got
few things to do. may be i... |
706 |
ham |
I don't think I can
get away for a trek that l... |
755 |
ham |
Realy sorry-i don't
recognise this number and ... |
In [128]:
mystr = 'Hi, Hello, Nmskar : How are you all ?, All Good ? : Be strong,
Be positive'
mystr
Out[128]:
'Hi,
Hello, Nmskar : How are you all ?, All Good ? : Be strong, Be positive'
In [143]:
mystr.split() # split the string
Out[143]:
['Hi,',
'Hello,',
'Nmskar',
':',
'How',
'are',
'you',
'all',
'?,',
'All',
'Good',
'?',
':',
'Be',
'strong,',
'Be',
'positive']
In [129]:
mystr.split(':') # split the string on the basis of a character
Out[129]:
['Hi,
Hello, Nmskar ',
' How are you all ?, All Good ? ',
' Be strong, Be positive']
In [135]:
mystr.split(maxsplit = 5) # split the string into number of strings
Out[135]:
['Hi,',
'Hello,',
'Nmskar',
':',
'How',
'are you all ?, All Good ? : Be strong, Be positive']
In [140]:
data['Message'][1]
Out[140]:
'Ok
lar... Joking wif u oni...'
In [141]:
data['Message'][1].split()
Out[141]:
['Ok',
'lar...', 'Joking', 'wif', 'u', 'oni...']
In [142]:
data['Message'][1].split()[0] # Get the 1st token of splitted string
Out[142]:
'Ok'
In [145]:
data['Message'].str.split().head() # Split all the rows in Message column
Out[145]:
0 [Go, until, jurong, point,, crazy..,
Available...
1 [Ok, lar..., Joking, wif, u,
oni...]
2 [Free, entry, in, 2, a, wkly, comp, to,
win, F...
3 [U, dun, say, so, early, hor..., U, c,
already...
4 [Nah, I, don't, think, he, goes, to, usf,,
he,...
Name:
Message, dtype: object
In [146]:
data['Message'].str.split().str.get(0).head() # Get the 1st token of splitted string for all rows
Out[146]:
0 Go
1 Ok
2 Free
3 U
4 Nah
Name:
Message, dtype: object
In [147]:
data.columns # To know the column names in a Dataset
Out[147]:
Index(['Status',
'Message'], dtype='object')
In [149]:
data.columns.str.len() # To know the length of each column names in a
Dataset
Out[149]:
Int64Index([6,
7], dtype='int64')
In [154]:
data.columns.str.upper() # Make the column names into uppercase
Out[154]:
Index(['STATUS',
'MESSAGE'], dtype='object')
No comments:
Post a Comment