Data and Database: Python : Pandas Series

Pandas Series

import pandas as pd

import numpy as np

lst = [10,20,30,40,50]

pd.Series(lst)

Out[3]:

0 10

1 20

2 30

3 40

4 50

dtype: int64

pd.Series([1,3,5])

Out[37]:

0 1

1 3

2 5

dtype: int64

pd.Series([[1,3,5]])

Out[38]:

0 [1, 3, 5]

dtype: object

pd.Series([1,3,5],[3,5]) # Length of passed values is 3, index implies 2

---------------------------------------------------------------------------

ValueError Traceback (most recent call last)

<ipython-input-46-a2b240657952> in <module>

----> 1 pd.Series([1,3,5],[3,5])

~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)

297 raise ValueError(

298 "Length of passed values is {val}, "

--> 299 "index implies {ind}".format(val=len(data), ind=len(index))

300 )

301 except TypeError:

ValueError: Length of passed values is 3, index implies 2

pd.Series([[1,3,5],3,5])

Out[41]:

0 [1, 3, 5]

1 3

2 5

dtype: object

lst_bool = [True, False, True, False, False]

pd.Series(lst_bool)

Out[5]:

0 True

1 False

2 True

3 False

4 False

dtype: bool

lst_mix = ['Pandas','Python',45,True,20.5]

pd.Series(lst_mix)

Out[6]:

0 Pandas

1 Python

2 45

3 True

4 20.5

dtype: object

pd.Series(5,index=[0]) # values,index

Out[11]:

0 5

dtype: int64

pd.Series(55,index=[0,1,2,3])

Out[13]:

0 55

1 55

2 55

3 55

dtype: int64

pd.Series([range(10)])

Out[17]:

0 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

dtype: object

pd.Series([num*10 for num in range(10)])

Out[18]:

0 0

1 10

2 20

3 30

4 40

5 50

6 60

7 70

8 80

9 90

dtype: int64

my_phonebook = {"Kuldip" : 9901501123,

"Amit" : 9862728,

"Chandan" : 900339}

type(my_phonebook)

Out[20]:

dict

In [21]: pd.Series(my_phonebook)

Out[21]:

KUldip 9901501123

Amit 9862728

Chandan 900339

dtype: int64

In [24]:

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')

Out[24]:

	Day	Open	High	Low	Close	Adj Close	Volume
0	1	16.770000	17.559999	16.730000	17.010000	16.934999	271700
1	2	17.100000	17.469999	16.950001	17.200001	17.200001	229800
2	3	17.059999	17.230000	16.870001	17.049999	17.049999	146300
3	4	16.900000	17.330000	16.900000	16.920000	16.920000	139700

type(pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv'))

Out[26]:

pandas.core.frame.DataFrame

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv')

Out[25]:

	67.72003252
0	6.217307
1	37.743532
2	37.556650
3	32.792233
4	51.034497
5	57.462068
6	9.021376
7	93.856161

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv', header=None , squeeze = True)

Out[28]:

0 67.720033

1 6.217307

2 37.743532

3 37.556650

4 32.792233

5 51.034497

6 57.462068

7 9.021376

8 93.856161

9 95.177522

Name: 0, dtype: float64

s = pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv', header=None , squeeze = True)

Out[29]:

0 67.720033

1 6.217307

2 37.743532

3 37.556650

4 32.792233

5 51.034497

6 57.462068

7 9.021376

8 93.856161

9 95.177522

Name: 0, dtype: float64

type(s)

Out[30]:

pandas.core.series.Series

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')

Out[31]:

	Day	Open	High	Low	Close	Adj Close	Volume
0	1	16.770000	17.559999	16.730000	17.010000	16.934999	271700
1	2	17.100000	17.469999	16.950001	17.200001	17.200001	229800
2	3	17.059999	17.230000	16.870001	17.049999	17.049999	146300

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Open'])

Out[32]:

	Open
0	16.770000
1	17.100000
2	17.059999

In [33]:

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Day','Open'])

Out[33]:

	Day	Open
0	1	16.770000
1	2	17.100000
2	3	17.059999

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', squeeze=True, usecols=['Day','Open'])

Out[36]:

	Day	Open
0	1	16.770000
1	2	17.100000
2	3	17.059999
3	4	16.900000

In [47]:

days = [31,28,31,30,31,30,31,31,30,31,30,31]

months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

pd.Series(data=days, index=months) # 1st parameter is for Data and 2nd parameter is for Index

Out[47]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

pd.Series(index=months, data=days )

Out[48]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

pd.Series(months, days )

Out[49]:

31 Jan

28 Feb

31 Mar

30 Apr

31 May

30 Jun

31 Jul

31 Aug

30 Sep

31 Oct

30 Nov

31 Dec

dtype: object

In [50]:

s=pd.Series(days,months)

Out[50]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

type(s)

Out[52]:

pandas.core.series.Series

In [54]:

s.index

Out[54]:

RangeIndex(start=0, stop=10, step=1)

In [55]:

pd.Series(data=[1,2])

Out[55]:

0 1

1 2

dtype: int64

s=pd.Series(days,months)

s.sort_values() # By default Ascending

Out[70]:

Feb 28

Apr 30

Jun 30

Sep 30

Nov 30

Jan 31

Mar 31

May 31

Jul 31

Aug 31

Oct 31

Dec 31

dtype: int64

In [60]:

s.sort_values(ascending=True)

Out[60]:

Feb 28

Apr 30

Jun 30

Sep 30

Nov 30

Jan 31

Mar 31

May 31

Jul 31

Aug 31

Oct 31

Dec 31

dtype: int64

In [61]:

s.sort_values(ascending=False)

Out[61]:

Dec 31

Oct 31

Aug 31

Jul 31

May 31

Mar 31

Jan 31

Nov 30

Sep 30

Jun 30

Apr 30

Feb 28

dtype: int64

In [71]:

Out[71]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

s.sort_index()

Out[63]:

Apr 30

Aug 31

Dec 31

Feb 28

Jan 31

Jul 31

Jun 30

Mar 31

May 31

Nov 30

Oct 31

Sep 30

dtype: int64

In [72]:

s # Original series was not changed

Out[72]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

In [73]:

s.sort_index( inplace=True) # inplace=True parameter is used to make the result permanent

In [74]:

Out[74]:

Apr 30

Aug 31

Dec 31

Feb 28

Jan 31

Jul 31

Jun 30

Mar 31

May 31

Nov 30

Oct 31

Sep 30

dtype: int64

In [77]:

days = [31,28,31,30,31,30,31,31,30,31,30,31]

months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

s2=pd.Series(data=days, index=months) # 1st parameter is for Data and 2nd parameter is for Index

Out[77]:

Jan 31

Feb 28

Mar 31

Apr 30

May 31

Jun 30

Jul 31

Aug 31

Sep 30

Oct 31

Nov 30

Dec 31

dtype: int64

In [78]:

s2[3] # Apr month data

Out[78]:

In [86]:

s2['Apr']

Out[86]:

In [79]:

s2[2:5] # March (2nd index) to May (4th index) and Upper bound is ommitted

Out[79]:

Mar 31

Apr 30

May 31

dtype: int64

In [82]:

s2[[2,9,6]]

Out[82]:

Mar 31

Oct 31

Jul 31

dtype: int64

In [87]:

s2[['Mar','Oct','Jul']]

Out[87]:

Mar 31

Oct 31

Jul 31

dtype: int64

In [88]:

s2 [-1] # Dec

Out[88]:

In [84]:

s2[-2:-5]

Out[84]:

Series([], dtype: int64)

In [85]:

s2[-5:-2]

Out[85]:

Aug 31

Sep 30

Oct 31

dtype: int64

In [90]:

s3 = pd.Series([1,2,3,4,5])

Out[90]:

0 1

1 2

2 3

3 4

4 5

dtype: int64

In [91]:

s3[1]

Out[91]:

In [93]:

s3[4]

Out[93]:

In [94]:

s3[:-1]

Out[94]:

0 1

1 2

2 3

3 4

dtype: int64

In [95]:

s3[-1]

---------------------------------------------------------------------------

KeyError Traceback (most recent call last)

<ipython-input-95-33762cb9f569> in <module>

----> 1 s3[-1]

~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __getitem__(self, key)

1066 key = com.apply_if_callable(key, self)

1067 try:

-> 1068 result = self.index.get_value(self, key)

1069

1070 if not is_scalar(result):

~\Downloads\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)

4728 k = self._convert_scalar_indexer(k, kind="getitem")

4729 try:

-> 4730 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))

4731 except KeyError as e1:

4732 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: -1

In [98]:

def adding100(num): # Function to add 100 in each data

return num+100

In [99]:

s3.apply(adding100)

# 'apply' method is used to implement the above function (passing as IN parameter) in each data in the series

Out[99]:

0 101

1 102

2 103

3 104

4 105

dtype: int64

In [100]:

mapobj = {1:10, 2:20, 3 : 30, 4:40, 5 : 50}

s3.map(mapobj) # map method uses Dictionary

Out[100]:

0 10

1 20

2 30

3 40

4 50

dtype: int64

Data and Database

Monday, 28 September 2020

Python : Pandas Series

No comments:

Total Pageviews

Labels