GPM data analysis
Contents
GPM data analysis#
Packages Required#
Numpy, Xarray, DrPy, Matplotlib, Cartopy and so on… by running the command in your terminal/command prompt as given below
conda install -c conda-forge cartopy xarray git
Install DrPy
pip install git+https://github.com/dopplerchase/DRpy.git
Import these installed packages#
import numpy as np
import h5py
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
# import netCDF4 as nc
import xarray as xr
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import cartopy.crs as ccrs
import cartopy.feature as feat
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
def gline(ax):
gl = ax.gridlines(crs=ccrs.PlateCarree(), linewidth=1, color='black', alpha=0.5,
linestyle='--', draw_labels=True)
gl.xlabels_top = False
gl.ylabels_left = True
gl.ylabels_right=False
gl.xlines = False
gl.ylines = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
#install drpy
!pip install git+https://github.com/dopplerchase/DRpy.git
Collecting git+https://github.com/dopplerchase/DRpy.git
Cloning https://github.com/dopplerchase/DRpy.git to /private/var/folders/vr/4xwzfzr92_v2npmgfxdnq9vh0000gn/T/pip-req-build-12tweu8t
Running command git clone --filter=blob:none --quiet https://github.com/dopplerchase/DRpy.git /private/var/folders/vr/4xwzfzr92_v2npmgfxdnq9vh0000gn/T/pip-req-build-12tweu8t
Resolved https://github.com/dopplerchase/DRpy.git to commit 7d4246d977e926d02b19059de0a0c3793711e2f1
Preparing metadata (setup.py) ... ?25l-
\
|
/
done
?25h
import drpy
import datetime as dt
dt.datetime(2021,5,15,20,43)
datetime.datetime(2021, 5, 15, 20, 43)
Download Level 2 GPM data#
create your PPS account here :
io = drpy.io.netrunner(servername='Research',username='hamidsyed37@gmail.com',
start_time=dt.datetime(2021,5,15,20,43),
end_time=dt.datetime(2021,5,15,22,17))
Downloading: https://arthurhouhttps.pps.eosdis.nasa.gov/text/gpmdata/2021/05/15/radar/2A.GPM.DPR.V9-20211125.20210515-S204332-E221604.040979.V07A.HDF5
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Input In [7], in <cell line: 1>()
----> 1 io = drpy.io.netrunner(servername='Research',username='hamidsyed37@gmail.com',
2 start_time=dt.datetime(2021,5,15,20,43),
3 end_time=dt.datetime(2021,5,15,22,17))
File ~/miniconda3/envs/syed/lib/python3.10/site-packages/drpy/io/io.py:48, in netrunner.__init__(self, servername, username, start_time, end_time, autorun, savedir)
46 self.locate_file()
47 #this will download it locally
---> 48 self.download(savedir=savedir)
File ~/miniconda3/envs/syed/lib/python3.10/site-packages/drpy/io/io.py:200, in netrunner.download(self, savedir)
196 args = cmd.split()
197 process = subprocess.Popen(args,
198 stdout=subprocess.PIPE,
199 stderr=subprocess.PIPE)
--> 200 process.wait()
201 print('Done')
File ~/miniconda3/envs/syed/lib/python3.10/subprocess.py:1207, in Popen.wait(self, timeout)
1205 endtime = _time() + timeout
1206 try:
-> 1207 return self._wait(timeout=timeout)
1208 except KeyboardInterrupt:
1209 # https://bugs.python.org/issue25942
1210 # The first keyboard interrupt waits briefly for the child to
1211 # exit under the common assumption that it also received the ^C
1212 # generated SIGINT and will exit rapidly.
1213 if timeout is not None:
File ~/miniconda3/envs/syed/lib/python3.10/subprocess.py:1941, in Popen._wait(self, timeout)
1939 if self.returncode is not None:
1940 break # Another thread waited.
-> 1941 (pid, sts) = self._try_wait(0)
1942 # Check the pid and loop as waitpid has been known to
1943 # return 0 even without WNOHANG in odd situations.
1944 # http://bugs.python.org/issue14396.
1945 if pid == self.pid:
File ~/miniconda3/envs/syed/lib/python3.10/subprocess.py:1899, in Popen._try_wait(self, wait_flags)
1897 """All callers to this function MUST hold self._waitpid_lock."""
1898 try:
-> 1899 (pid, sts) = os.waitpid(self.pid, wait_flags)
1900 except ChildProcessError:
1901 # This happens if SIGCLD is set to be ignored or waiting
1902 # for child processes has otherwise been disabled for our
1903 # process. This child is dead, we can't get the status.
1904 pid = self.pid
KeyboardInterrupt:
file = '2A.GPM.DPR.V9-20211125.20210515-S204332-E221604.040979.V07A.HDF5'
Read the data#
Reading data by using DrPy library
dpr = drpy.core.GPMDPR(filename=file)
Data analysis#
Let’s have a look at the data
dpr.ds
<xarray.Dataset> Dimensions: (nscan: 7932, nrayNS: 49, nfreq: 2, nbin: 176, nNUBF: 3, nNP: 4, method: 2, foreBack: 6, nearFar: 3, nsdew: 2, nfreqHI: 3, nbinSZP: 7) Coordinates: Latitude (nscan, nrayNS) float32 ... Longitude (nscan, nrayNS) float32 ... height (nscan, nrayNS, nbin) float32 ... time (nscan, nrayNS) datetime64[ns] 2021-05-15T20:4... Dimensions without coordinates: nscan, nrayNS, nfreq, nbin, nNUBF, nNP, method, foreBack, nearFar, nsdew, nfreqHI, nbinSZP Data variables: (12/96) sunLocalTime (nscan, nrayNS) float32 ... elevation (nscan, nrayNS) float32 ... landSurfaceType (nscan, nrayNS) int32 ... localZenithAngle (nscan, nrayNS, nfreq) float32 ... flagPrecip (nscan, nrayNS) int32 ... flagSigmaZeroSaturation (nscan, nrayNS, nfreq) uint8 ... ... ... surfaceSnowfallIndex (nscan, nrayNS) float32 ... flagEcho (nscan, nrayNS, nbin) int8 ... qualityData (nscan, nrayNS) int32 ... qualityFlag (nscan, nrayNS, nfreq) int8 ... flagSensor (nscan, nfreq) int8 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 flagScanPattern (nscan, nfreq) int16 0 1 0 1 0 1 ... 0 1 0 1 0 1 Attributes: FS_SwathHeader: NumberScansInSet=1;\nMaximumNumberScansTotal=10000;\nNum...
dpr.ds['zFactorFinal']
<xarray.DataArray 'zFactorFinal' (nscan: 7932, nrayNS: 49, nbin: 176, nfreq: 2)> [136811136 values with dtype=float32] Coordinates: Latitude (nscan, nrayNS) float32 ... Longitude (nscan, nrayNS) float32 ... height (nscan, nrayNS, nbin) float32 ... time (nscan, nrayNS) datetime64[ns] 2021-05-15T20:43:33.633000 ... ... Dimensions without coordinates: nscan, nrayNS, nbin, nfreq Attributes: DimensionNames: nscan,nray,nbin,nfreq Units: dBZ units: dBZ _FillValue: -9999.9 CodeMissingValue: -9999.9
fig = plt.figure(figsize = [10,6.5])
ax = plt.axes(projection = ccrs.PlateCarree())
ax.coastlines()
dpr.ds['zFactorFinal'].isel(nfreq=0).max(axis=2).plot(x = "Longitude",
y = "Latitude",
vmin = 0,
vmax = 60,
cbar_kwargs = {"pad":0.02,
"shrink":0.5
}
)
gline(ax)
Slicing the data#
Now if you see there are four dimensions in this dataset viz., when you type dpr.ds[‘zFactorFinal’]
(nscan: 7932, nrayNS: 49, nbin: 176, nfreq: 2)
The above figure shows that the GPM satellite travels in between latitudes ranging from 60ºS to 60ºN by making a sinwave like pattern around the globe.
This sinwave like pattern is called as the swath of the satellite (you can call it a path as well)
This swath is basically the data of reflectivity (dBZ)
If we plot this without this map or PlateCarre projection the data will look like as plotted below
It shows us that the “nscan” dimension is x-axis and “nrayNS” is y-axis and similarly “nbin” is the height or z-axis
We can also note that there is much data only as few lines, so let’s see what it is, so if we slice “nscan” somewhere between 5000 to 6000, we can see the Cyclone Tauktae over Indian ocean, because the GPM has passed over the Indian Ocean and we can clearly see that there are some data patches present over the region.
fig = plt.figure(figsize = [10,6.5])
ax = plt.axes()
dpr.ds['zFactorFinal'].isel(nfreq=0).max(axis=2).plot(x = "nscan",
vmin = 0,
vmax = 60,
cbar_kwargs = {"pad":0.02,
"shrink":0.5
}
)
<matplotlib.collections.QuadMesh at 0x133ab69b0>
start = 5550
end = 5630
dpr.ds['precipRateNearSurface'][start:end,:].plot.contourf(x='nscan',levels=range(0,200,2),cmap='turbo')
plt.gca().invert_yaxis()
dpr.ds['zFactorFinal'].isel(nscan=slice(start,end),nfreq=1).max(axis=2).plot.contourf(x='nscan',
levels=range(0,60,2),
cmap='turbo')
plt.title(pd.to_datetime(dpr.ds['zFactorFinal'].isel(nscan=slice(start,end),nfreq=1).time[0,0].values).strftime(
'%Y%m%d %H:%M:%S'))
plt.gca().invert_yaxis()
dpr.ds['flagHeavyIcePrecip'][start:end].plot(x = "nscan")
plt.gca().invert_yaxis()
plt.figure(figsize=[8,8])
ax = plt.axes(projection=ccrs.PlateCarree())
dpr.ds['zFactorFinal'][start:end,:,:,0].max(axis=2).plot.contourf(x='Longitude',y='Latitude',levels=range(0,60,9),
cmap='turbo')
ax.set_title(pd.to_datetime(dpr.ds['zFactorFinal'][start:end,:,160,0].time[0,0].values).strftime('%Y%m%d %H:%M:%S'))
ax.coastlines()
gline(ax)