Pandas Series
import pandas as pd
import numpy as np
lst = [10,20,30,40,50]
pd.Series(lst)
Out[3]:
0 10
1 20
2 30
3 40
4 50
dtype:
int64
pd.Series([1,3,5])
Out[37]:
0 1
1 3
2 5
dtype:
int64
pd.Series([[1,3,5]])
Out[38]:
0 [1, 3, 5]
dtype:
object
pd.Series([1,3,5],[3,5]) # Length of
passed values is 3, index implies 2
---------------------------------------------------------------------------
ValueError
Traceback (most recent call last)
<ipython-input-46-a2b240657952> in <module>
---->
1 pd.Series([1,3,5],[3,5])
~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
297 raise ValueError(
298 "Length of passed values is {val}, "
-->
299 "index implies {ind}".format(val=len(data), ind=len(index))
300 )
301 except TypeError:
ValueError: Length of passed values is 3, index implies 2
pd.Series([[1,3,5],3,5])
Out[41]:
0 [1, 3, 5]
1 3
2 5
dtype:
object
lst_bool = [True, False, True, False, False]
pd.Series(lst_bool)
Out[5]:
0 True
1 False
2 True
3 False
4 False
dtype:
bool
lst_mix = ['Pandas','Python',45,True,20.5]
pd.Series(lst_mix)
Out[6]:
0 Pandas
1 Python
2 45
3 True
4 20.5
dtype:
object
pd.Series(5,index=[0]) # values,index
Out[11]:
0 5
dtype:
int64
pd.Series(55,index=[0,1,2,3])
Out[13]:
0 55
1 55
2 55
3 55
dtype:
int64
pd.Series([range(10)])
Out[17]:
0 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
dtype:
object
pd.Series([num*10 for num in range(10)])
Out[18]:
0 0
1 10
2 20
3 30
4 40
5 50
6 60
7 70
8 80
9 90
dtype:
int64
my_phonebook = {"Kuldip" : 9901501123,
"Amit" : 9862728,
"Chandan" : 900339}
type(my_phonebook)
Out[20]:
dict
In [21]: pd.Series(my_phonebook)
Out[21]:
KUldip 9901501123
Amit 9862728
Chandan 900339
dtype:
int64
In [24]:
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')
Out[24]:
|
Day |
Open |
High |
Low |
Close |
Adj Close |
Volume |
|
|
0 |
1 |
16.770000 |
17.559999 |
16.730000 |
17.010000 |
16.934999 |
271700 |
|
1 |
2 |
17.100000 |
17.469999 |
16.950001 |
17.200001 |
17.200001 |
229800 |
|
2 |
3 |
17.059999 |
17.230000 |
16.870001 |
17.049999 |
17.049999 |
146300 |
|
3 |
4 |
16.900000 |
17.330000 |
16.900000 |
16.920000 |
16.920000 |
139700 |
type(pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv'))
Out[26]:
pandas.core.frame.DataFrame
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv')
Out[25]:
|
67.72003252 |
|
|
0 |
6.217307 |
|
1 |
37.743532 |
|
2 |
37.556650 |
|
3 |
32.792233 |
|
4 |
51.034497 |
|
5 |
57.462068 |
|
6 |
9.021376 |
|
7 |
93.856161 |
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv', header=None , squeeze = True)
Out[28]:
0 67.720033
1 6.217307
2 37.743532
3 37.556650
4 32.792233
5 51.034497
6 57.462068
7 9.021376
8 93.856161
9 95.177522
Name:
0, dtype: float64
s = pd.read_csv('C:/Users/kuldip_s/Documents/Python
Scripts/random1.csv', header=None , squeeze = True)
s
Out[29]:
0 67.720033
1 6.217307
2 37.743532
3 37.556650
4 32.792233
5 51.034497
6 57.462068
7 9.021376
8 93.856161
9 95.177522
Name:
0, dtype: float64
type(s)
Out[30]:
pandas.core.series.Series
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')
Out[31]:
|
Day |
Open |
High |
Low |
Close |
Adj Close |
Volume |
|
|
0 |
1 |
16.770000 |
17.559999 |
16.730000 |
17.010000 |
16.934999 |
271700 |
|
1 |
2 |
17.100000 |
17.469999 |
16.950001 |
17.200001 |
17.200001 |
229800 |
|
2 |
3 |
17.059999 |
17.230000 |
16.870001 |
17.049999 |
17.049999 |
146300 |
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Open'])
Out[32]:
|
Open |
|
|
0 |
16.770000 |
|
1 |
17.100000 |
|
2 |
17.059999 |
In [33]:
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Day','Open'])
Out[33]:
|
Day |
Open |
|
|
0 |
1 |
16.770000 |
|
1 |
2 |
17.100000 |
|
2 |
3 |
17.059999 |
pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', squeeze=True, usecols=['Day','Open'])
Out[36]:
|
Day |
Open |
|
|
0 |
1 |
16.770000 |
|
1 |
2 |
17.100000 |
|
2 |
3 |
17.059999 |
|
3 |
4 |
16.900000 |
In [47]:
days = [31,28,31,30,31,30,31,31,30,31,30,31]
months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
pd.Series(data=days, index=months) # 1st
parameter is for Data and 2nd parameter is for Index
Out[47]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
pd.Series(index=months, data=days )
Out[48]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
pd.Series(months, days )
Out[49]:
31 Jan
28 Feb
31 Mar
30 Apr
31 May
30 Jun
31 Jul
31 Aug
30 Sep
31 Oct
30 Nov
31 Dec
dtype:
object
In [50]:
s=pd.Series(days,months)
Out[50]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
type(s)
Out[52]:
pandas.core.series.Series
In [54]:
s.index
Out[54]:
RangeIndex(start=0,
stop=10, step=1)
In [55]:
pd.Series(data=[1,2])
Out[55]:
0 1
1 2
dtype:
int64
s=pd.Series(days,months)
s.sort_values() # By
default Ascending
Out[70]:
Feb 28
Apr 30
Jun 30
Sep 30
Nov 30
Jan 31
Mar 31
May 31
Jul 31
Aug 31
Oct 31
Dec 31
dtype:
int64
In [60]:
s.sort_values(ascending=True)
Out[60]:
Feb 28
Apr 30
Jun 30
Sep 30
Nov 30
Jan 31
Mar 31
May 31
Jul 31
Aug 31
Oct 31
Dec 31
dtype:
int64
In [61]:
s.sort_values(ascending=False)
Out[61]:
Dec 31
Oct 31
Aug 31
Jul 31
May 31
Mar 31
Jan 31
Nov 30
Sep 30
Jun 30
Apr 30
Feb 28
dtype:
int64
In [71]:
s
Out[71]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
s.sort_index()
Out[63]:
Apr 30
Aug 31
Dec 31
Feb 28
Jan 31
Jul 31
Jun 30
Mar 31
May 31
Nov 30
Oct 31
Sep 30
dtype:
int64
In [72]:
s # Original
series was not changed
Out[72]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
In [73]:
s.sort_index( inplace=True) # inplace=True parameter is used to make the result
permanent
In [74]:
s
Out[74]:
Apr 30
Aug 31
Dec 31
Feb 28
Jan 31
Jul 31
Jun 30
Mar 31
May 31
Nov 30
Oct 31
Sep 30
dtype:
int64
In [77]:
days = [31,28,31,30,31,30,31,31,30,31,30,31]
months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
s2=pd.Series(data=days, index=months) # 1st
parameter is for Data and 2nd parameter is for Index
s2
Out[77]:
Jan 31
Feb 28
Mar 31
Apr 30
May 31
Jun 30
Jul 31
Aug 31
Sep 30
Oct 31
Nov 30
Dec 31
dtype:
int64
In [78]:
s2[3] # Apr month data
Out[78]:
30
In [86]:
s2['Apr']
Out[86]:
30
In [79]:
s2[2:5] # March
(2nd index) to May (4th index) and Upper bound is ommitted
Out[79]:
Mar 31
Apr 30
May 31
dtype:
int64
In [82]:
s2[[2,9,6]]
Out[82]:
Mar 31
Oct 31
Jul 31
dtype:
int64
In [87]:
s2[['Mar','Oct','Jul']]
Out[87]:
Mar 31
Oct 31
Jul 31
dtype:
int64
In [88]:
s2 [-1] # Dec
Out[88]:
31
In [84]:
s2[-2:-5]
Out[84]:
Series([],
dtype: int64)
In [85]:
s2[-5:-2]
Out[85]:
Aug 31
Sep 30
Oct 31
dtype:
int64
In [90]:
s3 = pd.Series([1,2,3,4,5])
s3
Out[90]:
0 1
1 2
2 3
3 4
4 5
dtype:
int64
In [91]:
s3[1]
Out[91]:
2
In [93]:
s3[4]
Out[93]:
5
In [94]:
s3[:-1]
Out[94]:
0 1
1 2
2 3
3 4
dtype:
int64
In [95]:
s3[-1]
---------------------------------------------------------------------------
KeyError
Traceback (most recent call last)
<ipython-input-95-33762cb9f569> in <module>
---->
1 s3[-1]
~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
1066 key = com.apply_if_callable(key, self)
1067 try:
->
1068 result = self.index.get_value(self, key)
1069
1070 if not is_scalar(result):
~\Downloads\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4728 k = self._convert_scalar_indexer(k, kind="getitem")
4729 try:
->
4730 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4731 except KeyError as e1:
4732 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: -1
In [98]:
def adding100(num): # Function
to add 100 in each data
return num+100
In [99]:
s3.apply(adding100)
# 'apply' method is used to implement the above function (passing as IN
parameter) in each data in the series
Out[99]:
0 101
1 102
2 103
3 104
4 105
dtype:
int64
In [100]:
mapobj = {1:10, 2:20, 3 : 30, 4:40, 5 : 50}
s3.map(mapobj) # map method uses Dictionary
Out[100]:
0 10
1 20
2 30
3 40
4 50
dtype:
int64
No comments:
Post a Comment