Monday, 28 September 2020

Python : Pandas Series

 Pandas Series

import pandas as pd

import numpy as np

 

lst = [10,20,30,40,50]

pd.Series(lst)

 

Out[3]:

0    10

1    20

2    30

3    40

4    50

dtype: int64

 

pd.Series([1,3,5])

Out[37]:

0    1

1    3

2    5

dtype: int64

 

pd.Series([[1,3,5]])

Out[38]:

0    [1, 3, 5]

dtype: object

 

pd.Series([1,3,5],[3,5])  # Length of passed values is 3, index implies 2

---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-46-a2b240657952> in <module>

----> 1 pd.Series([1,3,5],[3,5])

 

~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)

    297                         raise ValueError(

    298                             "Length of passed values is {val}, "

--> 299                             "index implies {ind}".format(val=len(data), ind=len(index))

    300                         )

    301                 except TypeError:

 

ValueError: Length of passed values is 3, index implies 2

 

pd.Series([[1,3,5],3,5])

Out[41]:

0    [1, 3, 5]

1            3

2            5

dtype: object

 

lst_bool = [True, False, True, False, False]

pd.Series(lst_bool)

Out[5]:

0     True

1    False

2     True

3    False

4    False

dtype: bool

 

lst_mix = ['Pandas','Python',45,True,20.5]

pd.Series(lst_mix)

Out[6]:

0    Pandas

1    Python

2        45

3      True

4      20.5

dtype: object

 

pd.Series(5,index=[0]) # values,index

Out[11]:

0    5

dtype: int64

 

pd.Series(55,index=[0,1,2,3])

Out[13]:

0    55

1    55

2    55

3    55

dtype: int64

pd.Series([range(10)])

Out[17]:

0    (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

dtype: object

pd.Series([num*10 for num in range(10)])

Out[18]:

0     0

1    10

2    20

3    30

4    40

5    50

6    60

7    70

8    80

9    90

dtype: int64

 

my_phonebook = {"Kuldip" : 9901501123,

               "Amit" : 9862728,

               "Chandan" : 900339}

type(my_phonebook)

Out[20]:

dict

 

In [21]: pd.Series(my_phonebook)

Out[21]:

KUldip     9901501123

Amit          9862728

Chandan        900339

dtype: int64

 

In [24]:

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')

Out[24]:

Day

Open

High

Low

Close

Adj Close

Volume

0

1

16.770000

17.559999

16.730000

17.010000

16.934999

271700

1

2

17.100000

17.469999

16.950001

17.200001

17.200001

229800

2

3

17.059999

17.230000

16.870001

17.049999

17.049999

146300

3

4

16.900000

17.330000

16.900000

16.920000

16.920000

139700


type
(pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv'))

Out[26]:

pandas.core.frame.DataFrame

 

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv')

Out[25]:

67.72003252

0

6.217307

1

37.743532

2

37.556650

3

32.792233

4

51.034497

5

57.462068

6

9.021376

7

93.856161

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv', header=None , squeeze = True)

Out[28]:

0    67.720033

1     6.217307

2    37.743532

3    37.556650

4    32.792233

5    51.034497

6    57.462068

7     9.021376

8    93.856161

9    95.177522

Name: 0, dtype: float64

 

s = pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/random1.csv', header=None , squeeze = True)

s

Out[29]:

0    67.720033

1     6.217307

2    37.743532

3    37.556650

4    32.792233

5    51.034497

6    57.462068

7     9.021376

8    93.856161

9    95.177522

Name: 0, dtype: float64

type(s)

Out[30]:

pandas.core.series.Series

 

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv')

Out[31]:

Day

Open

High

Low

Close

Adj Close

Volume

0

1

16.770000

17.559999

16.730000

17.010000

16.934999

271700

1

2

17.100000

17.469999

16.950001

17.200001

17.200001

229800

2

3

17.059999

17.230000

16.870001

17.049999

17.049999

146300

 

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Open'])

Out[32]:

Open

0

16.770000

1

17.100000

2

17.059999

 

 

 

In [33]:

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', usecols=['Day','Open'])

Out[33]:

Day

Open

0

1

16.770000

1

2

17.100000

2

3

17.059999

 

pd.read_csv('C:/Users/kuldip_s/Documents/Python Scripts/Stock.csv', squeeze=True, usecols=['Day','Open'])

Out[36]:

Day

Open

0

1

16.770000

1

2

17.100000

2

3

17.059999

3

4

16.900000

 

In [47]:

days = [31,28,31,30,31,30,31,31,30,31,30,31]

months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

pd.Series(data=days, index=months) # 1st parameter is for Data and 2nd parameter is for Index

Out[47]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

pd.Series(index=months, data=days ) 

Out[48]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

pd.Series(months, days )

Out[49]:

31    Jan

28    Feb

31    Mar

30    Apr

31    May

30    Jun

31    Jul

31    Aug

30    Sep

31    Oct

30    Nov

31    Dec

dtype: object

 

In [50]:

s=pd.Series(days,months)

Out[50]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

type(s)

Out[52]:

pandas.core.series.Series

 

In [54]:

s.index

Out[54]:

RangeIndex(start=0, stop=10, step=1)

 

In [55]:

pd.Series(data=[1,2])

Out[55]:

0    1

1    2

dtype: int64

 

s=pd.Series(days,months)

s.sort_values()    # By default Ascending

 

Out[70]:

Feb    28

Apr    30

Jun    30

Sep    30

Nov    30

Jan    31

Mar    31

May    31

Jul    31

Aug    31

Oct    31

Dec    31

dtype: int64

 

In [60]:

s.sort_values(ascending=True)

Out[60]:

Feb    28

Apr    30

Jun    30

Sep    30

Nov    30

Jan    31

Mar    31

May    31

Jul    31

Aug    31

Oct    31

Dec    31

dtype: int64

 

In [61]:

s.sort_values(ascending=False)

Out[61]:

Dec    31

Oct    31

Aug    31

Jul    31

May    31

Mar    31

Jan    31

Nov    30

Sep    30

Jun    30

Apr    30

Feb    28

dtype: int64

 

In [71]:

s

Out[71]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

s.sort_index()

Out[63]:

Apr    30

Aug    31

Dec    31

Feb    28

Jan    31

Jul    31

Jun    30

Mar    31

May    31

Nov    30

Oct    31

Sep    30

dtype: int64

 

In [72]:

s                   # Original series was not changed

Out[72]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

In [73]:

s.sort_index( inplace=True)  # inplace=True parameter is used to make the result permanent

In [74]:

s

Out[74]:

Apr    30

Aug    31

Dec    31

Feb    28

Jan    31

Jul    31

Jun    30

Mar    31

May    31

Nov    30

Oct    31

Sep    30

dtype: int64

 

In [77]:

days = [31,28,31,30,31,30,31,31,30,31,30,31]

months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

s2=pd.Series(data=days, index=months) # 1st parameter is for Data and 2nd parameter is for Index

s2

Out[77]:

Jan    31

Feb    28

Mar    31

Apr    30

May    31

Jun    30

Jul    31

Aug    31

Sep    30

Oct    31

Nov    30

Dec    31

dtype: int64

 

In [78]:

s2[3] # Apr month data

Out[78]:

30

In [86]:

s2['Apr']

Out[86]:

30

In [79]:

s2[2:5]   # March (2nd index) to May (4th index) and Upper bound is ommitted

Out[79]:

Mar    31

Apr    30

May    31

dtype: int64

 

In [82]:

s2[[2,9,6]]

Out[82]:

Mar    31

Oct    31

Jul    31

dtype: int64

 

In [87]:

s2[['Mar','Oct','Jul']]

Out[87]:

Mar    31

Oct    31

Jul    31

dtype: int64

 

In [88]:

s2 [-1] # Dec

Out[88]:

31

 

In [84]:

s2[-2:-5]

Out[84]:

Series([], dtype: int64)

 

In [85]:

s2[-5:-2]

Out[85]:

Aug    31

Sep    30

Oct    31

dtype: int64

 

In [90]:

s3 = pd.Series([1,2,3,4,5])

s3

Out[90]:

0    1

1    2

2    3

3    4

4    5

dtype: int64

 

In [91]:

 s3[1]

Out[91]:

2

 

In [93]:

s3[4]

Out[93]:

5

 

In [94]:

s3[:-1]

Out[94]:

0    1

1    2

2    3

3    4

dtype: int64

 

In [95]:

s3[-1]

---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

<ipython-input-95-33762cb9f569> in <module>

----> 1 s3[-1]

 

~\Downloads\Anaconda\lib\site-packages\pandas\core\series.py in __getitem__(self, key)

   1066         key = com.apply_if_callable(key, self)

   1067         try:

-> 1068             result = self.index.get_value(self, key)

   1069

   1070             if not is_scalar(result):

 

~\Downloads\Anaconda\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)

   4728         k = self._convert_scalar_indexer(k, kind="getitem")

   4729         try:

-> 4730             return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))

   4731         except KeyError as e1:

   4732             if len(self) > 0 and (self.holds_integer() or self.is_boolean()):

 

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()

 

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()

 

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

 

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

 

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

 

KeyError: -1

 

In [98]:

 

def adding100(num):     # Function to add 100 in each data

    return num+100

 

In [99]:

s3.apply(adding100)

# 'apply' method is used to implement the above function (passing as IN parameter) in each data in the series

Out[99]:

0    101

1    102

2    103

3    104

4    105

dtype: int64

 

In [100]:

mapobj = {1:10, 2:20, 3 : 30, 4:40, 5 : 50}

s3.map(mapobj)  # map method uses Dictionary

Out[100]:

0    10

1    20

2    30

3    40

4    50

dtype: int64

 

No comments: