Fields not found when using pyhdf

2k views Asked by At

I am currently working with HDF files (version 4), and I use the pyhdf module (http://hdfeos.org/software/pyhdf.php).

When I open one of my HDF files in MATLAB using the nctoolbox, I get the following variables:

>> a = ncgeodataset('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')

a = 

  ncgeodataset with properties:

     location: '2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf'
       netcdf: [1x1 ucar.nc2.dataset.NetcdfDataset]
    variables: {16x1 cell}

>> a.variables

ans = 

    'StructMetadata.0'
    '2B-CLDCLASS/Geolocation Fields/Profile_time'
    '2B-CLDCLASS/Geolocation Fields/UTC_start'
    '2B-CLDCLASS/Geolocation Fields/TAI_start'
    '2B-CLDCLASS/Geolocation Fields/Height'
    '2B-CLDCLASS/Geolocation Fields/Range_to_intercept'
    '2B-CLDCLASS/Geolocation Fields/DEM_elevation'
    '2B-CLDCLASS/Geolocation Fields/Vertical_binsize'
    '2B-CLDCLASS/Geolocation Fields/Pitch_offset'
    '2B-CLDCLASS/Geolocation Fields/Roll_offset'
    '2B-CLDCLASS/Geolocation Fields/Latitude'
    '2B-CLDCLASS/Geolocation Fields/Longitude'
    '2B-CLDCLASS/Data Fields/Data_quality'
    '2B-CLDCLASS/Data Fields/Data_status'
    '2B-CLDCLASS/Data Fields/Data_targetID'
    '2B-CLDCLASS/Data Fields/cloud_scenario'

Using python and pyhdf I only see 2 variables:

>>> d = SD('2011365222309_30199_CS_2B-CLDCLASS_GRANULE_P_R04_E05.hdf')
>>> d.datasets()
{
  'cloud_scenario': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 1), 
          'Height': (('nray:2B-CLDCLASS', 'nbin:2B-CLDCLASS'), (20434, 125), 22, 0)
}

If someone could help me figure out what is going on here.

3

There are 3 answers

0
Heather QC On BEST ANSWER

You are opening the hdf file using pyhdf.SD, which only allows you to see scientific datasets (SDS). The fields that appear to be missing are Vdata fields, not SDS, so you must access them separately with pyhdf.HDF and pyhdf.VS.

Something like:

from pyhdf.HDF import *
from pyhdf.VS import *

open_file_for_reading_vdata = HDF("your_input_file.hdf", HC.READ).vstart()
vdata = open_file_for_reading_vdata.vdatainfo()
print vdata

For more detailed information, try this link: http://pysclint.sourceforge.net/pyhdf/documentation.html

6
TheBlackCat On

Without having access to the data, as best as I can tell it looks like this file contains netcdf data that the pyhdf module cannot read. The python equivalent to nctoolbox appears to be netCDF4. However, you are better off using the higher-level tool xray, which provides much more convenient data structures for working with such files.

0
cmcuervol On

To read all data in HDF file on python the description of pyhdf.V contains the following program shows the contents of the vgroups contained inside any HDF file

from pyhdf.HDF import *
from pyhdf.V   import *
from pyhdf.VS  import *
from pyhdf.SD  import *

import sys

def describevg(refnum):
    # Describe the vgroup with the given refnum.
    # Open vgroup in read mode.
    vg = v.attach(refnum)
    print "----------------"
    print "name:", vg._name, "class:",vg._class, "tag,ref:",
    print vg._tag, vg._refnum

    # Show the number of members of each main object type.
    print "members: ", vg._nmembers,
    print "datasets:", vg.nrefs(HC.DFTAG_NDG),
    print "vdatas:  ", vg.nrefs(HC.DFTAG_VH),
    print "vgroups: ", vg.nrefs(HC.DFTAG_VG)

    # Read the contents of the vgroup.
    members = vg.tagrefs()

    # Display info about each member.
    index = -1
    for tag, ref in members:
        index += 1
        print "member index", index
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)
            nrecs, intmode, fields, size, name = vd.inquire()
            print "  vdata:",name, "tag,ref:",tag, ref
            print "    fields:",fields
            print "    nrecs:",nrecs
            vd.detach()

        # SDS tag
        elif tag == HC.DFTAG_NDG:
            sds = sd.select(sd.reftoindex(ref))
            name, rank, dims, type, nattrs = sds.info()
            print "  dataset:",name, "tag,ref:", tag, ref
            print "    dims:",dims
            print "    type:",type
            sds.endaccess()

        # VS tag
        elif tag == HC.DFTAG_VG:
            vg0 = v.attach(ref)
            print "  vgroup:", vg0._name, "tag,ref:", tag, ref
            vg0.detach()

        # Unhandled tag
        else:
            print "unhandled tag,ref",tag,ref

    # Close vgroup
    vg.detach()
#
# Open HDF file in readonly mode.
# filename = sys.argv[1]
filename = path_FRLK+NameHDF_FRLK
hdf = HDF(filename)

# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v  = hdf.vgstart()

# Scan all vgroups in the file.
ref = -1
while 1:
    try:
        ref = v.getid(ref)
        print ref
    except HDF4Error,msg:    # no more vgroup
        break
    describevg(ref)

The following function extracts the data of HDF file in V mode

def HDFread(filename, variable, Class=None):
    """
    Extract the data for non-scientific data in V mode of hdf file
    """
    hdf = HDF(filename, HC.READ)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Found the class id
    if Class == None:
        ref = v.findclass('SWATH Vgroup') # The default value for Geolocation fields
    else:
        ref = v.findclass(Class)

    # Open all data of the class
    vg = v.attach(ref)
    # All fields in the class
    members = vg.tagrefs()

    nrecs = []
    names = []
    for tag, ref in members:
        # Vdata tag
        vd = vs.attach(ref)
        # nrecs, intmode, fields, size, name = vd.inquire()
        nrecs.append(vd.inquire()[0])  # number of records of the Vdata
        names.append(vd.inquire()[-1]) # name of the Vdata
        vd.detach()

    idx = names.index(variable)
    var = vs.attach(members[idx][1])
    V   = var.read(nrecs[idx])
    var.detach()
    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()
    # Close HDF file.
    hdf.close()

    return array(V)

This programs works with the HDF files of CloudSat.