Source code for pyspeckit.spectrum.readers.read_class

"""
------------------------
GILDAS CLASS file reader
------------------------

Read a CLASS file into an :class:`pyspeckit.spectrum.ObsBlock`
"""
from __future__ import print_function
from six.moves import xrange
from six import iteritems
import six
import astropy.io.fits as pyfits
import numpy
import numpy as np
from numpy import pi
from astropy import log
# from astropy.time import Time
from astropy import units as u
import pyspeckit
import sys
import re
try:
    from astropy.utils.console import ProgressBar
except ImportError:
    ProgressBar = lambda x: None
    ProgressBar.update = lambda x: None
import struct

import time

# 'range' is needed as a keyword
irange = range


[docs]
def print_timing(func):
    """
    Prints execution time of decorated function.
    Included here because CLASS files can take a little while to read;
    this should probably be replaced with a progressbar
    """
    def wrapper(*arg,**kwargs):
        t1 = time.time()
        res = func(*arg,**kwargs)
        t2 = time.time()
        log.info('%s took %0.5g s' % (func.__name__, (t2-t1)))
        return res
    wrapper.__doc__ = func.__doc__
    return wrapper



[docs]
def ensure_bytes(string):
    """
    Ensure a given string is in byte form
    """
    if six.PY3:
        return bytes(string, 'utf-8')
    else:
        return str(string)


""" Specification: http://iram.fr/IRAMFR/GILDAS/doc/html/class-html/node58.html """
filetype_dict = {'1A  ':'Multiple_IEEE',
                 '1   ':'Multiple_Vax',
                 '1B  ':'Multiple_EEEI',
                 '2A  ':'v2',
                 '2   ':'v2',
                 '2B  ':'v2',
                 '9A  ':'Single_IEEE',
                 '9   ':'Single_Vax',
                 '9B  ':'Single_EEEI'}
for key in list(filetype_dict.keys()):
    filetype_dict[ensure_bytes(key)] = filetype_dict[key]

fileversion_dict = {'1A  ':'v1',
                    '2A  ':'v2',
                    '9A  ':'v1', # untested
                   }
for key in list(fileversion_dict.keys()):
    fileversion_dict[ensure_bytes(key)] = fileversion_dict[key]

record_lengths = {'1A': 512,
                  '2A': 1024*4}

header_id_numbers = {0: 'USER CODE',
                     -1: 'COMMENT',
                     -2: 'GENERAL',
                     -3: 'POSITION',
                     -4: 'SPECTRO',
                     -5: 'BASELINE',
                     -6: 'HISTORY',
                     -7: 'UNKNOWN-APEX',
                    # -8: 'SWITCH',
                     -9: 'GAUSSFIT', # "private"; see class-interfaces-private.f90
                     -10: 'DRIFT',
                     -11: 'BEAMSWITCH', # "private"; see class-interfaces-private.f90
                     -12: 'SHELLFIT', # "private"; see class-interfaces-private.f90
                     -13: 'NH3FIT', # "private"; see class-interfaces-private.f90
                     -14: 'CALIBRATION',
                     -18: 'ABSFIT', # "private"; see class-interfaces-private.f90
                    }

header_id_lengths = {-2: 9, # may really be 10?
                     -3: 17,
                     -4: 17,
                     -5: None, # variable length
                     -6: 3, # variable length
                     -14: 25,
                    }

# from packages/classic/lib/classic_mod.f90
filedescv2_nw1=14


"""
GENERAL
 integer(kind=obsnum_length) :: num      ! [         ] Observation number
 integer(kind=4)             :: ver      ! [         ] Version number
 integer(kind=4)             :: teles(3) ! [         ] Telescope name
 integer(kind=4)             :: dobs     ! [MJD-60549] Date of observation
 integer(kind=4)             :: dred     ! [MJD-60549] Date of reduction
 integer(kind=4)             :: typec    ! [     code] Type of coordinates
 integer(kind=4)             :: kind     ! [     code] Type of data
 integer(kind=4)             :: qual     ! [     code] Quality of data
 integer(kind=4)             :: subscan  ! [         ] Subscan number
 integer(kind=obsnum_length) :: scan     ! [         ] Scan number
 ! Written in the entry
 real(kind=8)                :: ut       ! 1-2 [  rad] UT of observation
 real(kind=8)                :: st       ! 3-4 [  rad] LST of observation
 real(kind=4)                :: az       ! 5   [  rad] Azimuth
 real(kind=4)                :: el       ! 6   [  rad] Elevation
 real(kind=4)                :: tau      ! 7   [neper] Opacity
 real(kind=4)                :: tsys     ! 8   [    K] System temperature
 real(kind=4)                :: time     ! 9   [    s] Integration time
 ! Not in this section in file
 integer(kind=4)             :: xunit    ! [ code] X unit (if X coordinates section is present)
 ! NOT in data ---
 character(len=12)           :: cdobs    ! [string] Duplicate of dobs
 character(len=12)           :: cdred    ! [string] Duplicate of dred

"""

keys_lengths = {
        'unknown': [
     ('NUM'     ,1,'int32'), # Observation number
     ('VER'     ,1,'int32'), # Version number
     ('TELES'   ,3,'|S12') , # Telescope name
     ('DOBS'    ,1,'int32'), # Date of observation
     ('DRED'    ,1,'int32'), # Date of reduction
     ('TYPEC'   ,1,'int32'), # Type of coordinates
     ('KIND'    ,1,'int32'), # Type of data
     ('QUAL'    ,1,'int32'), # Quality of data
     ('SCAN'    ,1,'int32'), # Scan number
     ('SUBSCAN' ,1,'int32'), # Subscan number
     ],

       'COMMENT': [ # -1
                    ('LTEXT',1,'int32'), # integer(kind=4) :: ltext   ! Length of comment
                    ('CTEXT',1024//4,'|S1024'), # character ctext*1024       ! Comment string
                   ],

       'GENERAL': [ # -2
     ('UT'      ,2,'float64'), #  rad UT of observation
     ('ST'      ,2,'float64'), #  rad LST of observation
     ('AZ'      ,1,'float32'), #  rad Azimuth
     ('EL'      ,1,'float32'), #  rad Elevation
     ('TAU'     ,1,'float32'), # neper Opacity
     ('TSYS'    ,1,'float32'), #    K System temperature
     ('TIME'    ,1,'float32'), #    s Integration time
                    # XUNIT should not be there?
     #( 'XUNIT'   ,1,'int32'),   # code X unit (if xcoord_sec is present)
     ] ,
     'POSITION': [ # -3
    ('SOURC',3,'|S12')  , #  [ ] Source name
    ('EPOCH',1,'float32'), #  [ ] Epoch of coordinates
    ('LAM'  ,2,'float64'), #[rad] Lambda
    ('BET'  ,2,'float64'), #[rad] Beta
    ('LAMOF',1,'float32'), #  [rad] Offset in Lambda
    ('BETOF',1,'float32'), #  [rad] Offset in Beta
    ('PROJ' ,1,'int32')  , # [rad] Projection system
    ('SL0P' ,1,'float64'), # lambda of descriptive system # MAY NOT EXIST IN OLD CLASS
    ('SB0P' ,1,'float64'), # beta of descriptive system   # MAY NOT EXIST IN OLD CLASS
    ('SK0P' ,1,'float64'), # angle of descriptive system  # MAY NOT EXIST IN OLD CLASS
    ],
     'SPECTRO': [ # -4
     #('align'  ,1,'int32'),   #  [    ] Alignment padding
     ('LINE'   ,3,'|S12'),    #  [    ] Line name
     ('RESTF'  ,2,'float64'), #  [ MHz] Rest frequency
     ('NCHAN'  ,1,'int32'),   #  [    ] Number of channels
     ('RCHAN'  ,1,'float32'), #  [    ] Reference channels
     ('FRES'   ,1,'float32'), #  [ MHz] Frequency resolution
     ('FOFF'   ,1,'float32'), #  [ MHz] Frequency offset
     ('VRES'   ,1,'float32'), #  [km/s] Velocity resolution
     ('VOFF'   ,1,'float32'), #  [km/s] Velocity at reference channel
     ('BAD'    ,1,'float32'), #  [    ] Blanking value
     #('ALIGN_1',1,'int32'),   #  [    ] Alignment padding
     ('IMAGE'  ,2,'float64'), #  [ MHz] Image frequency
     #('ALIGN_2',1,'int32'),   #  [    ] Alignment padding
     ('VTYPE'  ,1,'int32'),   #  [code] Type of velocity
     ('DOPPLER',2,'float64'), #  [    ] Doppler factor = -V/c (CLASS convention)
     ],
     'CALIBRATION': [ # -14
     ('ALIGN',1,'int32'),    # BUFFER (it's a zero - it is not declared in the docs!!!!)
     ('BEEFF',1,'float32'),   # [ ] Beam efficiency
     ('FOEFF',1,'float32'),   # [ ] Forward efficiency
     ('GAINI',1,'float32'),   # [ ] Image/Signal gain ratio
     ('H2OMM',1,'float32'),   # [ mm] Water vapor content
     ('PAMB',1,'float32'),   # [ hPa] Ambient pressure
     ('TAMB',1,'float32'),   # [ K] Ambient temperature
     ('TATMS',1,'float32'),   # [ K] Atmosphere temp. in signal band
     ('TCHOP',1,'float32'),   # [ K] Chopper temperature
     ('TCOLD',1,'float32'),   # [ K] Cold load temperature
     ('TAUS',1,'float32'),   # [neper] Opacity in signal band
     ('TAUI',1,'float32'),   # [neper] Opacity in image band
     ('TATMI',1,'float32'),   # [ K] Atmosphere temp. in image band
     ('TREC',1,'float32'),   # [ K] Receiver temperature
     ('CMODE',1,'int32'),   # [ code] Calibration mode
     ('ATFAC',1,'float32'),   # [ ] Applied calibration factor
     ('ALTI',1,'float32'),   # [ m] Site elevation
     ('COUNT',3,'3float32'),   # [count] Power of Atm., Chopp., Cold
     ('LCALOF',1,'float32'),   # [ rad] Longitude offset for sky measurement
     ('BCALOF',1,'float32'),   # [ rad] Latitude offset for sky measurement
     ('GEOLONG',1,'float64'),   # [ rad] Geographic longitude of observatory # MAY NOT EXIST IN OLD CLASS
     ('GEOLAT',1,'float64'),   # [ rad] Geographic latitude of observatory   # MAY NOT EXIST IN OLD CLASS
         ],
    'BASELINE':[
        ('DEG',1,'int32'),       #! [         ] Degree of last baseline
        ('SIGFI',1,'float32'),   #! [Int. unit] Sigma
        ('AIRE',1,'float32'),    #! [Int. unit] Area under windows
        ('NWIND',1,'int32'),     #! [ ] Number of line windows
        # WARNING: These should probably have 'n', the second digit, = NWIND
        # The docs are really unclear about this, they say "W1(MWIND)"
        ('W1MWIND',1,'float32'), #! [km/s] Lower limits of windows
        ('W2MWIND',1,'float32'), #! [km/s] Upper limits of windows
        ('SINUS',3,'float32'),   #![]  Sinus baseline results
    ],

    'DRIFT':[ # 16?
        ('FREQ',1,'float64') ,  #! [ MHz] Rest frequency                   real(kind=8)    ::
        ('WIDTH',1,'float32'),  #! [ MHz] Bandwidth                        real(kind=4)    ::
        ('NPOIN',1,'int32')  ,  #! [    ] Number of data points              integer(kind=4) ::
        ('RPOIN',1,'float32'),  #! [    ] Reference point                  real(kind=4)    ::
        ('TREF',1,'float32') ,  #! [   ?] Time at reference                real(kind=4)    ::
        ('AREF',1,'float32') ,  #! [ rad] Angular offset at ref.           real(kind=4)    ::
        ('APOS',1,'float32') ,  #! [ rad] Position angle of drift          real(kind=4)    ::
        ('TRES',1,'float32') ,  #! [   ?] Time resolution                  real(kind=4)    ::
        ('ARES',1,'float32') ,  #! [ rad] Angular resolution               real(kind=4)    ::
        ('BAD',1,'float32')  ,  #! [    ] Blanking value                   real(kind=4)    ::
        ('CTYPE',1,'int32')  ,  #! [code] Type of offsets                    integer(kind=4) ::
        ('CIMAG',1,'float64'),  #! [ MHz] Image frequency                  real(kind=8)    ::
        ('COLLA',1,'float32'),  #! [   ?] Collimation error Az             real(kind=4)    ::
        ('COLLE',1,'float32'),  #! [   ?] Collimation error El             real(kind=4)    ::
    ],

     }

def _read_bytes(f, n):
    '''Read the next `n` bytes (from idlsave)'''
    return f.read(n)

"""
Warning: UNCLEAR what endianness should be!
Numpy seemed to get it right, and I think numpy assumes NATIVE endianness
"""

def _read_byte(f):
    '''Read a single byte (from idlsave)'''
    return numpy.uint8(struct.unpack('=B', f.read(4)[:1])[0])

def _read_int16(f):
    '''Read a signed 16-bit integer (from idlsave)'''
    return numpy.int16(struct.unpack('=h', f.read(4)[2:4])[0])

def _read_int32(f):
    '''Read a signed 32-bit integer (from idlsave)'''
    return numpy.int32(struct.unpack('=i', f.read(4))[0])

def _read_int64(f):
    '''Read a signed 64-bit integer '''
    return numpy.int64(struct.unpack('=q', f.read(8))[0])

def _read_float32(f):
    '''Read a 32-bit float (from idlsave)'''
    return numpy.float32(struct.unpack('=f', f.read(4))[0])

def _align_32(f):
    '''Align to the next 32-bit position in a file (from idlsave)'''

    pos = f.tell()
    if pos % 4 != 0:
        f.seek(pos + 4 - pos % 4)
    return

def _read_word(f,length):
    if length > 0:
        chars = _read_bytes(f, length)
        _align_32(f)
    else:
        chars = None
    return chars

def _read_int(f):
    return struct.unpack('i',f.read(4))


[docs]
def is_ascii(s):
    """Check if there are non-ascii characters in Unicode string

    Parameters
    ----------
    s : str
        The string to be checked

    Returns
    -------
    is_ascii : bool
        Returns True if all characters in the string are ascii. False
        otherwise.
    """
    return len(s) == len(s.decode('ascii').encode('utf-8'))


def is_all_null(s):
    return all(x=='\x00' for x in s) or all(x==b'\x00' for x in s)


"""
from clic_file.f90: v1, v2
    integer(kind=4)  :: bloc       !  1   : observation address [records]       integer(kind=8)  :: bloc       !  1- 2: observation address [records]     integer(kind=4)   :: bloc     !  1   : block read from index
    integer(kind=4)  :: num        !  2   : observation number                  integer(kind=4)  :: word       !  3   : address offset      [4-bytes]     integer(kind=4)   :: num      !  2   : number read
    integer(kind=4)  :: ver        !  3   : observation version                 integer(kind=4)  :: ver        !  4   : observation version               integer(kind=4)   :: ver      !  3   : version read from index
    integer(kind=4)  :: sourc(3)   !  4- 6: source name                         integer(kind=8)  :: num        !  5- 6: observation number                character(len=12) :: csour    !  4- 6: source read from index
    integer(kind=4)  :: line(3)    !  7- 9: line name                           integer(kind=4)  :: sourc(3)   !  7- 9: source name                       character(len=12) :: cline    !  7- 9: line read from index
    integer(kind=4)  :: teles(3)   ! 10-12: telescope name                      integer(kind=4)  :: line(3)    ! 10-12: line name                         character(len=12) :: ctele    ! 10-12: telescope read from index
    integer(kind=4)  :: dobs       ! 13   : observation date    [class_date]    integer(kind=4)  :: teles(3)   ! 13-15: telescope name                    integer(kind=4)   :: dobs     ! 13   : date obs. read from index
    integer(kind=4)  :: dred       ! 14   : reduction date      [class_date]    integer(kind=4)  :: dobs       ! 16   : observation date    [class_date]  integer(kind=4)   :: dred     ! 14   : date red. read from index
    real(kind=4)     :: off1       ! 15   : lambda offset       [radian]        integer(kind=4)  :: dred       ! 17   : reduction date      [class_date]  real(kind=4)      :: off1     ! 15   : read offset 1
    real(kind=4)     :: off2       ! 16   : beta offset         [radian]        real(kind=4)     :: off1       ! 18   : lambda offset       [radian]      real(kind=4)      :: off2     ! 16   : read offset 2
    integer(kind=4)  :: typec      ! 17   : coordinates types                   real(kind=4)     :: off2       ! 19   : beta offset         [radian]      integer(kind=4)   :: type     ! 17   : type of read offsets
    integer(kind=4)  :: kind       ! 18   : data kind                           integer(kind=4)  :: typec      ! 20   : coordinates types                 integer(kind=4)   :: kind     ! 18   : type of observation
    integer(kind=4)  :: qual       ! 19   : data quality                        integer(kind=4)  :: kind       ! 21   : data kind                         integer(kind=4)   :: qual     ! 19   : Quality read from index
    integer(kind=4)  :: scan       ! 20   : scan number                         integer(kind=4)  :: qual       ! 22   : data quality                      integer(kind=4)   :: scan     ! 20   : Scan number read from index
    integer(kind=4)  :: proc       ! 21   : procedure type                      integer(kind=4)  :: scan       ! 23   : scan number                       real(kind=4)      :: posa     ! 21   : Position angle
    integer(kind=4)  :: itype      ! 22   : observation type                    integer(kind=4)  :: proc       ! 24   : procedure type                    integer(kind=4)   :: subscan  ! 22   : Subscan number
    real(kind=4)     :: houra      ! 23   : hour angle          [radian]        integer(kind=4)  :: itype      ! 25   : observation type                  integer(kind=4)   :: pad(10)  ! 23-32: Pad to 32 words
    integer(kind=4)  :: project    ! 24   : project name                        real(kind=4)     :: houra      ! 26   : hour angle          [radian]
    integer(kind=4)  :: pad1       ! 25   : unused word                         integer(kind=4)  :: project(2) ! 27   : project name
    integer(kind=4)  :: bpc        ! 26   : baseline bandpass cal status        integer(kind=4)  :: bpc        ! 29   : baseline bandpass cal status
    integer(kind=4)  :: ic         ! 27   : instrumental cal status             integer(kind=4)  :: ic         ! 30   : instrumental cal status
    integer(kind=4)  :: recei      ! 28   : receiver number                     integer(kind=4)  :: recei      ! 31   : receiver number
    real(kind=4)     :: ut         ! 29   : UT                  [s]             real(kind=4)     :: ut         ! 32   : UT                  [s]
    integer(kind=4)  :: pad2(3)    ! 30-32: padding to 32 4-bytes word

equivalently

 integer(kind=obsnum_length) :: num      ! [         ] Observation number
 integer(kind=4)             :: ver      ! [         ] Version number
 integer(kind=4)             :: teles(3) ! [         ] Telescope name
 integer(kind=4)             :: dobs     ! [MJD-60549] Date of observation
 integer(kind=4)             :: dred     ! [MJD-60549] Date of reduction
 integer(kind=4)             :: typec    ! [     code] Type of coordinates
 integer(kind=4)             :: kind     ! [     code] Type of data
 integer(kind=4)             :: qual     ! [     code] Quality of data
 integer(kind=4)             :: subscan  ! [         ] Subscan number
 integer(kind=obsnum_length) :: scan     ! [         ] Scan number
"""

"""
index.f90:

  call conv%read%i8(data(1), indl%bloc,   1)  ! bloc
  call conv%read%i4(data(3), indl%word,   1)  ! word
  call conv%read%i8(data(4), indl%num,    1)  ! num
  call conv%read%i4(data(6), indl%ver,    1)  ! ver
  call conv%read%cc(data(7), indl%csour,  3)  ! csour
  call conv%read%cc(data(10),indl%cline,  3)  ! cline
  call conv%read%cc(data(13),indl%ctele,  3)  ! ctele
  call conv%read%i4(data(16),indl%dobs,   1)  ! dobs
  call conv%read%i4(data(17),indl%dred,   1)  ! dred
  call conv%read%r4(data(18),indl%off1,   1)  ! off1
  call conv%read%r4(data(19),indl%off2,   1)  ! off2
  call conv%read%i4(data(20),indl%type,   1)  ! type
  call conv%read%i4(data(21),indl%kind,   1)  ! kind
  call conv%read%i4(data(22),indl%qual,   1)  ! qual
  call conv%read%r4(data(23),indl%posa,   1)  ! posa
  call conv%read%i8(data(24),indl%scan,   1)  ! scan
  call conv%read%i4(data(26),indl%subscan,1)  ! subscan
  if (isv3) then
    call conv%read%r8(data(27),indl%ut,   1)  ! ut
  else
"""

def _read_indices(f, file_description):
    #if file_description['version'] in (1,2):
    #    extension_positions = (file_description['aex']-1)*file_description['reclen']*4
    #    all_indices = {extension:
    #                   [_read_index(f,
    #                                filetype=file_description['version'],
    #                                entry=ii,
    #                                #position=position,
    #                               )
    #                       for ii in range(file_description['lex1'])]
    #                   for extension,position in enumerate(extension_positions)
    #                   if position > 0
    #                  }

    #elif file_description['version'] == 1:
    extension_positions = ((file_description['aex'].astype('int64')-1)
                           *file_description['reclen']*4)
    all_indices = [_read_index(f,
                               filetype=file_description['version'],
                               # 1-indexed files
                               entry_number=ii+1,
                               file_description=file_description,
                              )
                       for ii in range(file_description['xnext']-1)]
    #else:
    #    raise ValueError("Invalid file version {0}".format(file_description['version']))


    return all_indices


def _find_index(entry_number, file_description, return_position=False):
    if file_description['gex'] == 10:
        kex=(entry_number-1)//file_description['lex1'] + 1
    else:
        # exponential growth:
        #kex = gi8_dicho(file_description['nex'], file_description['lexn'], entry_number) - 1
        kex = len([xx for xx in file_description['lexn'] if xx<entry_number])

    ken = entry_number - file_description['lexn'][kex-1]
    #! Find ken (relative entry number in the extension, starts from 1)
    #ken = entry_num - file%desc%lexn(kex-1)

    kb = ((ken-1)*file_description['lind'])//file_description['reclen']
    #kb = ((ken-1)*file%desc%lind)/file%desc%reclen  ! In the extension, the
    #    ! relative record position (as an offset, starts from 0) where the
    #    ! Entry Index starts. NB: there can be a non-integer number of Entry
    #    ! Indexes per record

    # Subtract 1: 'aex' is 1-indexed
    kbl = (file_description['aex'][kex-1]+kb)-1
    # kbl = file%desc%aex(kex)+kb  ! The absolute record number where the Entry Index goes

    k = ((ken-1)*file_description['lind'])  % file_description['reclen']
    #k = mod((ken-1)*file%desc%lind,file%desc%reclen)+1  ! = in the record, the
    #  ! first word of the Entry Index of the entry number 'entry_num'


    if return_position:
        return (kbl*file_description['reclen']+k)*4
    else:
        return kbl,k


def _read_index(f, filetype='v1', DEBUG=False, clic=False, position=None,
                entry_number=None, file_description=None):

    if position is not None:
        f.seek(position)
    if entry_number is not None:
        indpos = _find_index(entry_number, file_description, return_position=True)
        f.seek(indpos)

    x0 = f.tell()

    if filetype in ('1A  ','v1', 1):
        log.debug('Index filetype 1A')
        index = {
                   "XBLOC":_read_int32(f),
                   "XNUM":_read_int32(f),
                   "XVER":_read_int32(f),
                   "XSOURC":_read_word(f,12),
                   "XLINE":_read_word(f,12),
                   "XTEL":_read_word(f,12),
                   "XDOBS":_read_int32(f),
                   "XDRED":_read_int32(f),
                   "XOFF1":_read_float32(f),# 	 first offset (real, radians)
                   "XOFF2":_read_float32(f),# 	 second offset (real, radians)
                   "XTYPE":_read_int32(f),# 	 coordinate system ('EQ'', 'GA', 'HO')
                   "XKIND":_read_int32(f),# 	 Kind of observation (0: spectral, 1: continuum, )
                   "XQUAL":_read_int32(f),# 	 Quality (0-9)
                   "XSCAN":_read_int32(f),# 	 Scan number
                }
        index['BLOC'] = index['XBLOC'] # v2 compatibility
        index['WORD'] = 1 # v2 compatibility
        index['SOURC'] = index['CSOUR'] = index['XSOURC']
        index['DOBS'] = index['CDOBS'] = index['XDOBS']
        index['CTELE'] = index['XTEL']
        index['LINE'] = index['XLINE']
        index['OFF1'] = index['XOFF1']
        index['OFF2'] = index['XOFF2']
        index['QUAL'] = index['XQUAL']
        index['SCAN'] = index['XSCAN']
        index['KIND'] = index['XKIND']
        if clic: # use header set up in clic
            nextchunk = {
                        "XPROC":_read_int32(f),# "procedure type"
                        "XITYPE":_read_int32(f),#
                        "XHOURANG":_read_float32(f),#
                        "XPROJNAME":_read_int32(f),#
                        "XPAD1":_read_int32(f),
                        "XBPC" :_read_int32(f),
                        "XIC" :_read_int32(f),
                        "XRECEI" :_read_int32(f),
                        "XUT":_read_float32(f),
                        "XPAD2":numpy.fromfile(f,count=3,dtype='int32') # BLANK is NOT ALLOWED!!! It is a special KW
            }
        else:
            nextchunk = {"XPOSA":_read_float32(f),
                         "XSUBSCAN":_read_int32(f),
                         'XPAD2': numpy.fromfile(f,count=10,dtype='int32'),
                         }
            nextchunk['SUBSCAN'] = nextchunk['XSUBSCAN']
            nextchunk['POSA'] = nextchunk['XPOSA']
        index.update(nextchunk)
        if (f.tell() - x0 != 128):
            missed_bits = (f.tell()-x0)
            X = f.read(128-missed_bits)
            if DEBUG: print("read_index missed %i bits: %s" % (128-missed_bits,X))
            #raise IndexError("read_index did not successfully read 128 bytes at %i.  Read %i bytes." % (x0,f.tell()-x0))
        if any(not is_ascii(index[x]) for x in ('XSOURC','XLINE','XTEL')):
            raise ValueError("Invalid index read from {0}.".format(x0))
    elif filetype in ('2A  ','v2', 2):
        log.debug('Index filetype 2A')
        index = {
            "BLOC"   : _read_int64(f)  ,  #(data(1),  1)  ! bloc
            "WORD"   : _read_int32(f)  ,  #(data(3),  1)  ! word
            "NUM"    : _read_int64(f)  ,  #(data(4),  1)  ! num
            "VER"    : _read_int32(f)  ,  #(data(6),  1)  ! ver
            "CSOUR"  : _read_word(f,12),  #(data(7),  3)  ! csour
            "CLINE"  : _read_word(f,12),  #(data(10), 3)  ! cline
            "CTELE"  : _read_word(f,12),  #(data(13), 3)  ! ctele
            "DOBS"   : _read_int32(f)  ,  #(data(16), 1)  ! dobs
            "DRED"   : _read_int32(f)  ,  #(data(17), 1)  ! dred
            "OFF1"   : _read_float32(f),  #(data(18), 1)  ! off1
            "OFF2"   : _read_float32(f),  #(data(19), 1)  ! off2
            "TYPE"   : _read_int32(f)  ,  #(data(20), 1)  ! type
            "KIND"   : _read_int32(f)  ,  #(data(21), 1)  ! kind
            "QUAL"   : _read_int32(f)  ,  #(data(22), 1)  ! qual
            "POSA"   : _read_float32(f),  #(data(23), 1)  ! posa
            "SCAN"   : _read_int64(f)  ,  #(data(24), 1)  ! scan
            "SUBSCAN": _read_int32(f)  ,  #(data(26), 1)  ! subscan
        }
        #last24bits = f.read(24)
        #log.debug("Read 24 bits: '{0}'".format(last24bits))
        if any((is_all_null(index[x]) or not is_ascii(index[x]))
               for x in ('CSOUR','CLINE','CTELE')):
            raise ValueError("Invalid index read from {0}.".format(x0))
        index['SOURC'] = index['XSOURC'] = index['CSOUR']
        index['LINE'] = index['XLINE'] = index['CLINE']
        index['XKIND'] = index['KIND']
        try:
            index['DOBS'] = index['XDOBS'] = index['CDOBS']
        except KeyError:
            index['CDOBS'] = index['XDOBS'] = index['DOBS']

    else:
        raise NotImplementedError("Filetype {0} not implemented.".format(filetype))

    # from kernel/lib/gsys/date.f90: gag_julda
    index['MJD'] = index['DOBS'] + 60549
    class_dobs = index['DOBS']
    index['DOBS'] = ((class_dobs + 365*2025)/365.2425 + 1)
    # SLOW
    #index['DATEOBS'] = Time(index['DOBS'], format='jyear')
    #index['DATEOBSS'] = index['DATEOBS'].iso

    log.debug("Indexing finished at {0}".format(f.tell()))
    return index

def _read_header(f, type=0, position=None):
    """
    Read a header entry from a CLASS file
    (helper function)
    """
    if position is not None:
        f.seek(position)
    if type in keys_lengths:
        hdrsec = [(x[0],numpy.fromfile(f,count=1,dtype=x[2])[0])
                for x in keys_lengths[type]]
        return dict(hdrsec)
    else:
        return {}
        raise ValueError("Unrecognized type {0}".format(type))

def _read_first_record(f):
    f.seek(0)
    filetype = f.read(4)
    if fileversion_dict[filetype] == 'v1':
        return _read_first_record_v1(f)
    elif fileversion_dict[filetype] == 'v2':
        return _read_first_record_v2(f)
    else:
        raise ValueError("Unrecognized filetype {0}".format(filetype))

def _read_first_record_v1(f, record_length_words=128):
    r"""
    Position           & Parameter    & Fortran Kind & Purpose \\
    \hline
    1                & {\tt code}   & Character*4  & File code                           \\
    2                & {\tt next}   & Integer*4    & Next free record                    \\
    3                & {\tt lex}    & Integer*4    & Length of first extension (number of entries) \\
    4                & {\tt nex}    & Integer*4    & Number of extensions                \\
    5                & {\tt xnext}  & Integer*4    & Next available entry number         \\
    6:2*{\tt reclen} & {\tt ex(:)}  & Integer*4    & Array of extension addresses

    from classic_mod.f90:
     integer(kind=4) :: code         ! 1     File code
     integer(kind=4) :: next         ! 2     Next free record
     integer(kind=4) :: lex          ! 3     Extension length (number of entries)
     integer(kind=4) :: nex          ! 4     Number of extensions
     integer(kind=4) :: xnext        ! 5     Next available entry number
     integer(kind=4) :: aex(mex_v1)  ! 6:256 Extension addresses

    from old (<dec2013) class, file.f90:
     read(ilun,rec=1,err=11,iostat=ier) ibx%code,ibx%next,   &
          &      ibx%ilex,ibx%imex,ibx%xnext

    also uses filedesc_v1tov2 from classic/lib/file.f90
    """

#  OLD NOTES
#        hdr = header
#        hdr.update(obshead) # re-overwrite things
#        hdr.update({'OBSNUM':obsnum,'RECNUM':spcount})
#        hdr.update({'RA':hdr['LAM']/pi*180,'DEC':hdr['BET']/pi*180})
#        hdr.update({'RAoff':hdr['LAMOF']/pi*180,'DECoff':hdr['BETOF']/pi*180})
#        hdr.update({'OBJECT':hdr['SOURC'].strip()})
#        hdr.update({'BUNIT':'Tastar'})
#        hdr.update({'EXPOSURE':hdr['TIME']})


    f.seek(0)
    file_description = {
        'code': f.read(4),
        'next':  _read_int32(f),
        'lex':   _read_int32(f),
        'nex':   _read_int32(f),
        'xnext': _read_int32(f),
        'gex': 10.,
        'vind': 1, # classic_vind_v1 packages/classic/lib/classic_mod.f90
        'version': 1,
        'nextrec': 3,
        'nextword': 1,
        'lind': 32, #classic_lind_v1 packages/classic/lib/classic_mod.f90
        'kind': 'unknown',
        'flags': 0,
    }
    file_description['reclen'] = record_length_words # should be 128w = 512 bytes
    ex = np.fromfile(f, count=(record_length_words*2-5), dtype='int32')
    file_description['ex'] = ex[ex!=0]
    file_description['nextrec'] = file_description['next'] # this can't be...
    file_description['lex1'] = file_description['lex'] # number of entries
    file_description['lexn'] = (np.arange(file_description['nex']+1) *
                                file_description['lex1'])
    file_description['nentries'] = np.sum(file_description['lexn'])
    file_description['aex'] = file_description['ex'][:file_description['nex']]
    #file_description['version'] = fileversion_dict[file_description['code']]
    assert f.tell() == 1024
    # Something is not quite right with the 'ex' parsing
    #assert len(file_description['ex']) == file_description['nex']
    return file_description

def _read_first_record_v2(f):
    r""" packages/classic/lib/file.f90
    Position        & Parameter      & Fortran Kind & Purpose                               & Unit    \\
    \hline
    1               & {\tt code}     & Character*4  & File code                             &  -      \\
    2               & {\tt reclen}   & Integer*4    & Record length                         & words   \\
    3               & {\tt kind}     & Integer*4    & File kind                             &  -      \\
    4               & {\tt vind}     & Integer*4    & Index version                         &  -      \\
    5               & {\tt lind}     & Integer*4    & Index length                          & words   \\
    6               & {\tt flags}    & Integer*4    & Bit flags. \#1: single or multiple,   &  -      \\
                    &                &              & \#2-32: provision (0-filled)          &         \\
    \hline
    7:8             & {\tt xnext}    & Integer*8    & Next available entry number           &  -      \\
    9:10            & {\tt nextrec}  & Integer*8    & Next record which contains free space & record  \\
    11              & {\tt nextword} & Integer*4    & Next free word in this record         & word    \\
    \hline
    12              & {\tt lex1}     & Integer*4    & Length of first extension index       & entries \\
    13              & {\tt nex}      & Integer*4    & Number of extensions                  &  -      \\
    14              & {\tt gex}      & Integer*4    & Extension growth rule                 &  -      \\
    15:{\tt reclen} & {\tt aex(:)}   & Integer*8    & Array of extension addresses          & record
    """
    f.seek(0)
    file_description = {
        'code':     f.read(4),
        'reclen':   _read_int32(f),
        'kind':     _read_int32(f),
        'vind':     _read_int32(f),
        'lind':     _read_int32(f),
        'flags':    _read_int32(f),
        'xnext':    _read_int64(f),
        'nextrec':  _read_int64(f),
        'nextword': _read_int32(f),
        'lex1':     _read_int32(f),
        'nex':      _read_int32(f),
        'gex':      _read_int32(f),
    }
    file_description['lexn'] = [0]
    if file_description['gex'] == 10:
        for ii in range(1, file_description['nex']+1):
            file_description['lexn'].append(file_description['lexn'][-1]+file_description['lex1'])
    else:
        #! Exponential growth. Only growth with mantissa 2.0 is supported
        for ii in range(1, file_description['nex']):
            # I don't know what the fortran does here!!!
            # ahh, maybe 2_8 means int(2, dtype='int64')
            nent = int(file_description['lex1'] * 2**(ii-1))
            #nent = int(file%desc%lex1,kind=8) * 2_8**(iex-1)
            file_description['lexn'].append(file_description['lexn'][-1]+nent)
            #file%desc%lexn(iex) = file%desc%lexn(iex-1) + nent
    file_description['nentries'] = np.sum(file_description['lexn'])
    record_length_words = file_description['reclen']
    aex = numpy.fromfile(f, count=(record_length_words-15)//2, dtype='int64')
    file_description['aex'] = aex[aex!=0]
    assert len(file_description['aex']) == file_description['nex']
    file_description['version'] = 2
    return file_description


[docs]
def gi8_dicho(ninp,lexn,xval,ceil=True):
    """
    ! @ public
    !  Find ival such as
    !    X(ival-1) < xval <= X(ival)     (ceiling mode)
    !  or
    !    X(ival) <= xval < X(ival+1)     (floor mode)
    ! for input data ordered. Use a dichotomic search for that.
    call gi8_dicho(nex,file%desc%lexn,entry_num,.true.,kex,error)
    """
    #integer(kind=size_length), intent(in)    :: np     ! Number of input points
    #integer(kind=8),           intent(in)    :: x(np)  ! Input ordered Values
    #integer(kind=8),           intent(in)    :: xval   ! The value we search for
    #logical,                   intent(in)    :: ceil   ! Ceiling or floor mode?
    #integer(kind=size_length), intent(out)   :: ival   ! Position in the array
    #logical,                   intent(inout) :: error  ! Logical error flag
    iinf = 1
    isup = ninp
    #! Ceiling mode
    while isup > (iinf+1):
        imid = int(np.floor((isup + iinf)/2.))
        if (lexn[imid-1] < xval):
            iinf = imid
        else:
            isup = imid
    ival = isup
    return ival


def _read_obshead(f, file_description, position=None, verbose=False):
    if file_description['version'] == 1:
        return _read_obshead_v1(f, position=position, verbose=verbose)
    if file_description['version'] == 2:
        return _read_obshead_v2(f, position=position)
    else:
        raise ValueError("Invalid file version {0}.".
                         format(file_description['version']))

def _read_obshead_v2(f, position=None):
    """
    ! Version 2 (public)
    integer(kind=4), parameter :: entrydescv2_nw1=11  ! Number of words, in 1st part
    integer(kind=4), parameter :: entrydescv2_nw2=5   ! Number of words for 1 section in 2nd part
    type classic_entrydesc_t
     sequence
     integer(kind=4) :: code     !  1   : code observation icode
     integer(kind=4) :: version  !  2   : observation version
     integer(kind=4) :: nsec     !  3   : number of sections
     integer(kind=4) :: pad1     !  -   : memory padding (not in data)
     integer(kind=8) :: nword    !  4- 5: number of words
     integer(kind=8) :: adata    !  6- 7: data address
     integer(kind=8) :: ldata    !  8- 9: data length
     integer(kind=8) :: xnum     ! 10-11: entry number
     ! Out of the 'sequence' block:
     integer(kind=4) :: msec     ! Not in data: maximum number of sections the
                                 ! Observation Index can hold
     integer(kind=4) :: pad2     ! Memory padding for 8 bytes alignment
     integer(kind=4) :: seciden(classic_maxsec)  ! Section Numbers (on disk: 1 to ed%nsec)
     integer(kind=8) :: secleng(classic_maxsec)  ! Section Lengths (on disk: 1 to ed%nsec)
     integer(kind=8) :: secaddr(classic_maxsec)  ! Section Addresses (on disk: 1 to ed%nsec)
    end type classic_entrydesc_t
    """
    if position is not None:
        f.seek(position)
    else:
        position = f.tell()
    IDcode = f.read(4)
    if IDcode.strip() != b'2':
        raise IndexError("Observation Header reading failure at {0}.  "
                         "Record does not appear to be an observation header.".
                         format(position))
    f.seek(position)

    entrydescv2_nw1 = 11
    entrydescv2_nw2 = 5
    obshead = {
        'CODE': f.read(4),
        'VERSION': _read_int32(f),
        'NSEC': _read_int32(f),
        #'_blank': _read_int32(f),
        'NWORD': _read_int64(f),
        'ADATA': _read_int64(f),
        'LDATA': _read_int64(f),
        'XNUM': _read_int64(f),
        #'MSEC': _read_int32(f),
        #'_blank2': _read_int32(f),
    }
    section_numbers = np.fromfile(f, count=obshead['NSEC'], dtype='int32')
    section_lengths = np.fromfile(f, count=obshead['NSEC'], dtype='int64')
    section_addresses = np.fromfile(f, count=obshead['NSEC'], dtype='int64')

    return obshead['XNUM'],obshead,dict(zip(section_numbers,section_addresses))

def _read_obshead_v1(f, position=None, verbose=False):
    """
    Read the observation header of a CLASS file
    (helper function for read_class; should not be used independently)
    """
    if position is not None:
        f.seek(position)
    IDcode = f.read(4)
    if IDcode.strip() != b'2':
        raise IndexError("Observation Header reading failure at {0}.  "
                         "Record does not appear to be an observation header.".
                         format(f.tell() - 4))
    (nblocks, nbyteob, data_address, nheaders, data_length, obindex, nsec,
     obsnum) = numpy.fromfile(f, count=8, dtype='int32')
    if verbose:
        print("nblocks,nbyteob,data_address,data_length,nheaders,obindex,nsec,obsnum",nblocks,nbyteob,data_address,data_length,nheaders,obindex,nsec,obsnum)
        print("DATA_LENGTH: ",data_length)

    seccodes = numpy.fromfile(f,count=nsec,dtype='int32')
    # Documentation says addresses then length: It is apparently wrong
    seclen = numpy.fromfile(f,count=nsec,dtype='int32')
    secaddr = numpy.fromfile(f,count=nsec,dtype='int32')
    if verbose:
        print("Section codes, addresses, lengths: ",seccodes,secaddr,seclen)

    hdr = {'NBLOCKS':nblocks, 'NBYTEOB':nbyteob, 'DATAADDR':data_address,
           'DATALEN':data_length, 'NHEADERS':nheaders, 'OBINDEX':obindex,
           'NSEC':nsec, 'OBSNUM':obsnum}

    #return obsnum,seccodes
    return obsnum,hdr,dict(zip(seccodes,secaddr))

# THIS IS IN READ_OBSHEAD!!!
# def _read_preheader(f):
#     """
#     Not entirely clear what this is, but it is stuff that precedes the actual data
#
#     Looks something like this:
#     array([          1,          -2,          -3,          -4,         -14,
#                  9,          17,          18,          25,          55,
#                 64,          81,          99, -1179344801,   979657591,
#
#     -2, -3, -4, -14 indicate the 4 header types
#     9,17,18,25 *MAY* indicate the number of bytes in each
#
#
#     HOW is it indicated how many entries there are?
#     """
#     # 13 comes from counting 1, -2,....99 above
#     numbers = np.fromfile(f, count=13, dtype='int32')
#     sections = [n for n in numbers if n in header_id_numbers]
#     return sections


[docs]
def downsample_1d(myarr,factor,estimator=np.mean, weight=None):
    """
    Downsample a 1D array by averaging over *factor* pixels.
    Crops right side if the shape is not a multiple of factor.

    This code is pure numpy and should be fast.

    keywords:
        estimator - default to mean.  You can downsample by summing or
            something else if you want a different estimator
            (e.g., downsampling error: you want to sum & divide by sqrt(n))
        weight: np.ndarray
            An array of weights to use for the downsampling.  If None,
            assumes uniform 1
    """
    if myarr.ndim != 1:
        raise ValueError("Only works on 1d data.  Says so in the title.")
    xs = myarr.size
    crarr = myarr[:xs-(xs % int(factor))]
    if weight is None:
        dsarr = estimator(np.concatenate([[crarr[i::factor] for i in
                                           range(factor)]]),axis=0)
    else:
        dsarr = estimator(np.concatenate([[crarr[i::factor]*weight[i::factor] for i in
                                           range(factor)]]),axis=0)
        warr = estimator(np.concatenate([[weight[i::factor] for i in
                                          range(factor)]]),axis=0)
        dsarr = dsarr/warr
    return dsarr


# unit test
def test_downsample1d():
    data = np.arange(10)
    weight = np.ones(10)
    weight[5]=0
    assert np.all(downsample_1d(data, 2, weight=weight, estimator=np.mean) ==
                  np.array([0.5,  2.5,  4.0,  6.5,  8.5]))

def read_observation(f, obsid, file_description=None, indices=None,
                     my_memmap=None, memmap=True, verbose=False):
    if isinstance(f, str):
        f = open(f,'rb')
        opened = True
        if memmap:
            my_memmap = numpy.memmap(f, offset=0, dtype='float32',
                                     mode='r')
        else:
            my_memmap = None
    elif my_memmap is None and memmap:
        raise ValueError("Must pass in a memmap object if passing in a file object.")
    else:
        opened = False

    if file_description is None:
        file_description = _read_first_record(f)

    if indices is None:
        indices = _read_indices(f, file_description)

    index = indices[obsid]

    obs_position = (index['BLOC']-1)*file_description['reclen']*4 + (index['WORD']-1)*4
    log.debug("Reading observation at position {0}".format(obs_position))
    obsnum,obshead,sections = _read_obshead(f, file_description,
                                            position=obs_position,
                                            verbose=verbose)
    header = obshead

    datastart = 0
    for section_id,section_address in iteritems(sections):
        # Section addresses are 1-indexed byte addresses
        # in the current "block"
        sec_position = obs_position + (section_address-1)*4
        temp_hdr = _read_header(f, type=header_id_numbers[section_id],
                                position=sec_position)
        header.update(temp_hdr)
        datastart = max(datastart,f.tell())

    hdr = header
    hdr.update(obshead) # re-overwrite things
    hdr.update({'OBSNUM':obsnum,'RECNUM':obsid})
    hdr.update({'RA':hdr['LAM']/pi*180,'DEC':hdr['BET']/pi*180})
    hdr.update({'RAoff':hdr['LAMOF']/pi*180,'DECoff':hdr['BETOF']/pi*180})
    hdr.update({'OBJECT':hdr['SOURC'].strip()})
    hdr.update({'BUNIT':'Tastar'})
    hdr.update({'EXPOSURE':float(hdr['TIME'])})
    hdr['HDRSTART'] = obs_position
    hdr['DATASTART'] = datastart
    hdr.update(indices[obsid])
    # Define MJD as mid-exposure time in MJD
    hdr.update({'OBSDATE': hdr['MJD'] + hdr['UT']/2./pi})

    # Apparently the data are still valid in this case?
    #if hdr['XNUM'] != obsid+1:
    #    log.error("The spectrum read was {0} but {1} was requested.".
    #              format(hdr['XNUM']-1, obsid))

    if hdr['KIND'] == 1: # continuum
        nchan = hdr['NPOIN']
    elif 'NCHAN' in hdr:
        nchan = hdr['NCHAN']
    else:
        log.error("No NCHAN in header.  This is not a spectrum.")
        import ipdb; ipdb.set_trace()
    # There may be a 1-channel offset?  CHECK!!!
    # (changed by 1 pixel - October 14, 2014)
    # (changed back - October 21, 2014 - I think the ends are just bad, but not
    # zero.)
    f.seek(datastart-1)
    spec = _read_spectrum(f, position=datastart-1, nchan=nchan,
                          memmap=memmap, my_memmap=my_memmap)

    if opened:
        f.close()

    return spec, hdr

def _read_spectrum(f, position, nchan, my_memmap=None, memmap=True):
    if position != f.tell():
        log.warning("Reading data from {0}, but the file is wound "
                 "to {1}.".format(position, f.tell()))
    if memmap:
        here = position
        #spectrum = numpy.memmap(filename, offset=here, dtype='float32',
        #                        mode='r', shape=(nchan,))
        spectrum = my_memmap[here//4:here//4+nchan]
        f.seek(here+nchan*4)
    else:
        f.seek(position)
        spectrum = numpy.fromfile(f,count=nchan,dtype='float32')

    return spectrum

def _spectrum_from_header(fileobj, header, memmap=None):
    return _read_spectrum(fileobj, position=header['DATASTART'],
                          nchan=header['NCHAN'] if 'NCHAN' in hdr else hdr['NPOIN'],
                          my_memmap=memmap)

def clean_header(header):
    newheader = {}
    for k in header:
        if not isinstance(header[k], (int, float, str)):
            if isinstance(header[k], np.ndarray) and header[k].size > 1:
                if header[k].size > 10:
                    raise ValueError("Large array being put in header.  That's no good.  key={0}".format(k))
                for ii,val in enumerate(header[k]):
                    newheader[k[:7]+str(ii)] = val
            else:
                newheader[k[:8]] = str(header[k])
        else:
            newheader[k[:8]] = header[k]

    return newheader

class ClassObject(object):
    def __init__(self, filename, verbose=False):
        t0 = time.time()
        self._file = open(filename, 'rb')
        self.file_description = _read_first_record(self._file)
        self.allind = _read_indices(self._file, self.file_description)
        self._data = np.memmap(self._file, dtype='float32', mode='r')
        if verbose: log.info("Setting _spectra")
        self._spectra = LazyItem(self)
        t1 = time.time()
        if verbose: log.info("Setting posang.  t={0}".format(t1-t0))
        self.set_posang()
        t2 = time.time()
        if verbose: log.info("Identifying otf scans.  t={0}".format(t2-t1))
        self._identify_otf_scans(verbose=verbose)
        t3 = time.time()
        #self._load_all_spectra()
        if verbose:
            log.info("Loaded CLASS object with {3} indices.  Time breakdown:"
                     " {0}s for indices, "
                     "{1}s for posang, and {2}s for OTF scan identification"
                     .format(t1-t0, t2-t1, t3-t2, len(self.allind)))


    def __repr__(self):
        s = "\n".join(["{k}: {v}".format(k=k,v=v)
                       for k,v in iteritems(self.getinfo())])
        return "ClassObject({id}) with {nspec} entries\n".format(id=id(self),
                                                                 nspec=len(self.allind)) + s

    def getinfo(self, allsources=False):
        info = dict(
                    tels = self.tels,
                    lines = self.lines,
                    scans = self.scans,
                    sources = self.sources if allsources else self.sci_sources,
                   )
        return info

    def set_posang(self):
        h0 = self.headers[0]
        for h in self.headers:
            dx = h['OFF1'] - h0['OFF1']
            dy = h['OFF2'] - h0['OFF2']
            h['COMPPOSA'] = np.arctan2(dy,dx)*180/np.pi
            h0 = h


    def _identify_otf_scans(self, verbose=False):
        h0 = self.allind[0]
        st = 0
        otfscan = 0
        posangs = [h['COMPPOSA'] for h in self.allind]
        if verbose:
            pb = ProgressBar(len(self.allind))

        for ii,h in enumerate(self.allind):
            if (h['SCAN'] != h0['SCAN']
                or h['SOURC'] != h0['SOURC']):

                h0['FIRSTSCAN'] = st
                cpa = np.median(posangs[st:ii])
                for hh in self.allind[st:ii]:
                    hh['SCANPOSA'] = cpa % 180
                st = ii
                if h['SCAN'] == h0['SCAN']:
                    h0['OTFSCAN'] = otfscan
                    otfscan += 1
                    h['OTFSCAN'] = otfscan
                else:
                    otfscan = 0
                    h['OTFSCAN'] = otfscan
            else:
                h['OTFSCAN'] = otfscan

            if verbose:
                pb.update(ii)

    def listscans(self, source=None, telescope=None, out=sys.stdout):
        minid=0
        scan = -1
        sourc = ""
        #tel = ''
        minoff1,maxoff1 = np.inf,-np.inf
        minoff2,maxoff2 = np.inf,-np.inf
        ttlangle,nangle = 0.0,0
        print("{entries:15s} {SOURC:12s} {XTEL:12s} {SCAN:>8s} {SUBSCAN:>8s} "
              "[ {RAmin:>12s}, {RAmax:>12s} ] "
              "[ {DECmin:>12s}, {DECmax:>12s} ] "
              "{angle:>12s} {SCANPOSA:>12s} {OTFSCAN:>8s} {TSYS:>8s} {UTD:>12s}"
              .format(entries='Scans', SOURC='Source', XTEL='Telescope',
                      SCAN='Scan', SUBSCAN='Subscan',
                      RAmin='min(RA)', RAmax='max(RA)',
                      DECmin='min(DEC)', DECmax='max(DEC)',
                      SCANPOSA='Scan PA',
                      angle='Angle', OTFSCAN='OTFscan',
                      TSYS='TSYS', UTD='UTD'),
             file=out)

        data_rows = []

        for ii,row in enumerate(self.headers):
            if (row['SCAN'] == scan
                and row['SOURC'] == sourc
                #and row['XTEL'] == tel
               ):
                minoff1 = min(minoff1, row['OFF1'])
                maxoff1 = max(maxoff1, row['OFF1'])
                minoff2 = min(minoff2, row['OFF2'])
                maxoff2 = max(maxoff2, row['OFF2'])
                ttlangle += np.arctan2(row['OFF2'] - prevrow['OFF2'],
                                       row['OFF1'] - prevrow['OFF1'])%np.pi
                nangle += 1
                prevrow = row

            else:
                if scan == -1:
                    scan = row['SCAN']
                    sourc = row['SOURC']
                    #tel = row['XTEL']
                    prevrow = row
                    continue

                ok = True
                if source is not None:
                    if isinstance(source, (list,tuple)):
                        ok = ok and any(re.search((s), prevrow['SOURC'])
                                        for s in source)
                    else:
                        ok = ok and re.search((source), prevrow['SOURC'])
                if telescope is not None:
                    ok = ok and re.search((telescope), prevrow['XTEL'])
                if ok:
                    data = dict(RAmin=minoff1*180/np.pi*3600,
                                RAmax=maxoff1*180/np.pi*3600,
                                DECmin=minoff2*180/np.pi*3600,
                                DECmax=maxoff2*180/np.pi*3600,
                                angle=(ttlangle/nangle)*180/np.pi if nangle>0 else 0,
                                e0=minid,
                                e1=ii-1,
                                #TSYS=row['TSYS'] if 'TSYS' in row else '--',
                                UTD=row['DOBS']+row['UT'] if 'UT' in row else -99,
                                **prevrow)
                    print("{e0:7d}-{e1:7d} {SOURC:12s} {XTEL:12s} {SCAN:8d} {SUBSCAN:8d} "
                          "[ {RAmin:12f}, {RAmax:12f} ] "
                          "[ {DECmin:12f}, {DECmax:12f} ] "
                          "{angle:12.1f} {SCANPOSA:12.1f} {OTFSCAN:8d}"
                          " {TSYS:>8.1f} {UTD:12f}".
                          format(**data),
                         file=out)

                    data_rows.append(data)

                minoff1,maxoff1 = np.inf,-np.inf
                minoff2,maxoff2 = np.inf,-np.inf
                ttlangle,nangle = 0.0,0
                scan = row['SCAN']
                sourc = row['SOURC']
                #tel = row['XTEL']
                minid = ii

        return data

    @property
    def tels(self):
        if hasattr(self,'_tels'):
            return self._tels
        else:
            self._tels = set([h['XTEL'] for h in self.allind])
            return self._tels

    @property
    def sources(self):
        if hasattr(self,'_source'):
            return self._source
        else:
            self._source = set([h['SOURC'] for h in self.allind])
            return self._source

    @property
    def scans(self):
        if hasattr(self,'_scan'):
            return self._scan
        else:
            self._scan = set([h['SCAN'] for h in self.allind])
            return self._scan

    @property
    def sci_sources(self):
        return set([s for s in self.sources
                    if s[:4] not in ('SKY-', 'TSYS', 'TCAL', 'TREC', 'HOT-',
                                     'COLD')])

    @property
    def lines(self):
        if hasattr(self,'_lines'):
            return self._lines
        else:
            self._lines = set([h['LINE'] for h in self.allind])
            return self._lines

    def _load_all_spectra(self, indices=None):
        if indices is None:
            indices = range(self.file_description['xnext']-1)

        if hasattr(self, '_loaded_indices'):
            indices_set = set(indices)
            indices_to_load = (indices_set.difference(self._loaded_indices))
            self._loaded_indices = self._loaded_indices.union(indices_set)

            if any(indices_to_load):
                pb = ProgressBar(len(indices_to_load))
                for ii,k in enumerate(xrange(indices_to_load)):
                    self._spectra[k]
                    pb.update(ii)

        else:
            self._loaded_indices = set(indices)

            self._spectra.load_all()


    @property
    def spectra(self):
        return [x[0] for x in self._spectra]

    @property
    def headers(self):
        return [self._spectra[ii][1]
                if ii in self._spectra else x
                for ii,x in enumerate(self.allind)]

    def select_spectra(self,
                       all=None,
                       line=None,
                       linere=None,
                       linereflags=re.IGNORECASE,
                       number=None,
                       scan=None,
                       offset=None,
                       source=None,
                       sourcere=None,
                       sourcereflags=re.IGNORECASE,
                       range=None,
                       quality=None,
                       telescope=None,
                       telescopere=None,
                       telescopereflags=re.IGNORECASE,
                       subscan=None,
                       entry=None,
                       posang=None,
                       #observed=None,
                       #reduced=None,
                       frequency=None,
                       section=None,
                       user=None,
                       include_old_versions=False,
                      ):
        """
        Parameters
        ----------
        include_old_versions: bool
            Include spectra with XVER numbers <0?  These are CLASS spectra that
            have been "overwritten" (re-reduced?)
        """
        if entry is not None and len(entry)==2:
            return irange(entry[0], entry[1])

        if frequency is not None:
            self._load_all_spectra()

        sel = [(re.search(re.escape(ensure_bytes(line)), h['LINE'], re.IGNORECASE)
                if line is not None else True) and
               (re.search(ensure_bytes(linere), h['LINE'], linereflags)
                if linere is not None else True) and
               (h['SCAN'] == scan if scan is not None else True) and
               ((h['OFF1'] == offset or
                 h['OFF2'] == offset) if offset is not None else True) and
               (re.search(re.escape(ensure_bytes(source)), h['CSOUR'], re.IGNORECASE)
                if source is not None else True) and
               (re.search(ensure_bytes(sourcere), h['CSOUR'], sourcereflags)
                if sourcere is not None else True) and
               (h['OFF1']>range[0] and h['OFF1'] < range[1] and
                h['OFF2']>range[2] and h['OFF2'] < range[3]
                if range is not None and len(range)==4 else True) and
               (h['QUAL'] == quality if quality is not None else True) and
               (re.search(re.escape(ensure_bytes(telescope)), h['CTELE'], re.IGNORECASE)
                if telescope is not None else True) and
               (re.search(ensure_bytes(telescopere), h['CTELE'], telescopereflags)
                if telescopere is not None else True) and
               (h['SUBSCAN']==subscan if subscan is not None else True) and
               (h['NUM'] >= number[0] and h['NUM'] < number[1]
                if number is not None else True) and
               ('RESTF' in h and # Need to check that it IS a spectrum: continuum data can't be accessed this way
                h['RESTF'] > frequency[0] and
                h['RESTF'] < frequency[1]
                if frequency is not None and len(frequency)==2
                else True) and
               (h['COMPPOSA']%180 > posang[0] and
                h['COMPPOSA']%180 < posang[1]
                if posang is not None and len(posang)==2
                else True) and
               # 1A uses XVER, 2A uses VER.  If neither are present, it's
               # probably not a valid spectrum?
               (h.get('XVER', h.get('VER', -999)) > 0
                if not include_old_versions else True)
               for h in self.headers
              ]

        return [ii for ii,k in enumerate(sel) if k]

    def get_spectra(self, progressbar=True, **kwargs):
        selected_indices = self.select_spectra(**kwargs)

        if not any(selected_indices):
            raise ValueError("Selection yielded empty.")

        self._spectra.load(selected_indices, progressbar=progressbar)
        return [self._spectra[ii] for ii in selected_indices]

    def get_pyspeckit_spectra(self, progressbar=True, **kwargs):

        spdata = self.get_spectra(progressbar=progressbar, **kwargs)

        spectra = [pyspeckit.Spectrum(data=data,
                                      xarr=make_axis(header),
                                      header=clean_header(header))
                   for data,header in spdata]

        return spectra


    def read_observations(self, observation_indices, progressbar=True):
        self._spectra.load(observation_indices, progressbar=progressbar)
        return [self._spectra[ii] for ii in observation_indices]



[docs]
@print_timing
def read_class(filename, downsample_factor=None, sourcename=None,
               telescope=None, line=None, posang=None, verbose=False,
               flag_array=None):
    """
    Read a binary class file.
    Based on the
    `GILDAS CLASS file type Specification
    <http://iram.fr/IRAMFR/GILDAS/doc/html/class-html/node58.html>`_

    Parameters
    ----------
    filename: str
    downsample_factor: None or int
        Factor by which to downsample data by averaging.  Useful for
        overresolved data.
    sourcename: str or list of str
        Source names to match to the data (uses regex)
    telescope: str or list of str
        'XTEL' or 'TELE' parameters: the telescope & instrument
    line: str or list of str
        The line name
    posang: tuple of 2 floats
        The first float is the minimum value for the position angle. The second
        float is the maximum value for the position angle.
    verbose: bool
        Log messages with severity INFO
    flag_array: np.ndarray
        An array with the same shape as the data used to flag out
        (remove) data when downsampling.  True = flag out
    """
    classobj = ClassObject(filename)

    if not isinstance(sourcename, (list,tuple)):
        sourcename = [sourcename]
    if not isinstance(telescope, (list,tuple)):
        telescope = [telescope]
    if not isinstance(line, (list,tuple)):
        line = [line]

    spectra,headers = [],[]
    if verbose:
        log.info("Reading...")
    selection = [ii
                 for source in sourcename
                 for tel in telescope
                 for li in line
                 for ii in classobj.select_spectra(sourcere=source,
                                                   telescope=tel,
                                                   line=li,
                                                   posang=posang)]

    sphdr = classobj.read_observations(selection)
    if len(sphdr) == 0:
        return None
    spec,hdr = zip(*sphdr)
    spectra += spec
    headers += hdr

    indexes = headers

    weight = ~flag_array if flag_array is not None else None

    if downsample_factor is not None:
        if verbose:
            log.info("Downsampling...")
        spectra = [downsample_1d(spec, downsample_factor,
                                 weight=weight)
                   for spec in ProgressBar(spectra)]
        headers = [downsample_header(h, downsample_factor)
                   for h in ProgressBar(headers)]

    for hdr in headers:
        stringify_header(hdr)

    return spectra,headers,indexes


def stringify_header(header):
    from six import string_types, integer_types
    import string
    FITS_allowed_types = (string_types + integer_types +
                          (float, complex, bool, np.floating, np.integer,
                           np.complexfloating, np.bool_))
    bad_chars = string.printable[96:]
    badcharre = re.compile("[{0}]".format(bad_chars))
    for key, value in header.items():
        if isinstance(value, bytes):
            header[key] = value.decode()
        elif not isinstance(value, FITS_allowed_types):
            header[key] = badcharre.sub("", str(header[key]))

def downsample_header(hdr, downsample_factor):
    for k in ('NCHAN','NPOIN','DATALEN'):
        if k in hdr:
            hdr[k] = int((hdr[k] / downsample_factor))
    # maybe wrong? h['RCHAN'] = (h['RCHAN']-1) / downsample_factor + 1
    scalefactor = 1./downsample_factor
    hdr['RCHAN'] = (hdr['RCHAN']-1)*scalefactor + 0.5 + scalefactor/2.
    for kw in ['FRES','VRES']:
        if kw in hdr:
            hdr[kw] *= downsample_factor
    return hdr


[docs]
def make_axis(header,imagfreq=False):
    """
    Create a :class:`pyspeckit.spectrum.units.SpectroscopicAxis` from the CLASS "header"
    """
    from .. import units

    rest_frequency = header.get('RESTF')
    xunits = 'MHz'
    nchan = header.get('NCHAN')
    voff = header.get('VOFF')
    foff = header.get('FOFF')
    doppler = header.get('DOPPLER')
    fres = header.get('FRES')
    refchan = header.get('RCHAN')
    imfreq = header.get('IMAGE')

    if foff in (None, 0.0) and voff not in (None, 0.0):
        # Radio convention
        foff = -voff/2.997924580e5 * rest_frequency

    if not imagfreq:
        xarr =  rest_frequency + foff + (numpy.arange(1, nchan+1) - refchan) * fres
        XAxis = units.SpectroscopicAxis(xarr,unit='MHz',refX=rest_frequency*u.MHz)
    else:
        xarr = imfreq - (numpy.arange(1, nchan+1) - refchan) * fres
        XAxis = units.SpectroscopicAxis(xarr,unit='MHz',refX=imfreq*u.MHz)

    return XAxis



[docs]
@print_timing
def class_to_obsblocks(filename, telescope, line, datatuple=None, source=None,
                       imagfreq=False, DEBUG=False,  **kwargs):
    """
    Load an entire CLASS observing session into a list of ObsBlocks based on
    matches to the 'telescope', 'line' and 'source' names

    Parameters
    ----------
    filename : string
        The Gildas CLASS data file to read the spectra from.
    telescope : list
        List of telescope names to be matched.
    line : list
        List of line names to be matched.
    source : list (optional)
        List of source names to be matched. Defaults to None.
    imagfreq : bool
        Create a SpectroscopicAxis with the image frequency.
    """
    if datatuple is None:
        spectra,header,indexes = read_class(filename, **kwargs)
    else:
        spectra,header,indexes = datatuple

    obslist = []
    lastscannum = -1
    spectrumlist = None
    for sp,hdr,ind in zip(spectra,header,indexes):
        hdr.update(ind)
        # this is slow but necessary...
        H = pyfits.Header()
        for k,v in iteritems(hdr):
            if hasattr(v,"__len__") and not isinstance(v,str):
                # make an array of header entries, but this
                # supports only up to 10 of them...
                if len(v) > 1:
                    if len(v) < 10:
                        for ii,vv in enumerate(v):
                            newkey = k[:7]+str(ii)
                            H[newkey] = vv
                    elif len(v) < 100:
                        for ii,vv in enumerate(v):
                            newkey = k[:6]+str(ii)
                            H[newkey] = vv
                    else:
                        raise ValueError("Too many entries for {0}".format(k))
                else:
                    H[k] = v[0]
            #elif not any(x in str(v).lower() for x in ('comment', 'end', 'history')):
            #    # do not try to add comments...
            #    This commented out block used to attempt to reject comments
            #    using a private regex in the old pyfits which no longer exists.
            #    I don't know if it was necessary.
            else:
                H[k] = v
        scannum = hdr['SCAN']
        if 'XTEL' in hdr and hdr['XTEL'].strip() not in telescope:
            continue
        if hdr['LINE'].strip() not in line:
            continue
        if (source is not None) and (hdr['SOURC'].strip() not in source):
            continue
        hdr['RESTFREQ'] = hdr.get('RESTF')
        H['RESTFREQ'] = hdr.get('RESTF')

        #print "Did not skip %s,%s.  Scannum, last: %i,%i" % (hdr['XTEL'],hdr['LINE'],scannum,lastscannum)

        if scannum != lastscannum:
            lastscannum = scannum
            if spectrumlist is not None:
                obslist.append(pyspeckit.ObsBlock(spectrumlist))
            xarr = make_axis(hdr,imagfreq=imagfreq)
            spectrumlist = [(
                pyspeckit.Spectrum(xarr=xarr,
                    header=H,
                    data=sp))]
        else:
            spectrumlist.append(
                pyspeckit.Spectrum(xarr=xarr,
                    header=H,
                    data=sp))

    return obslist



[docs]
class LazyItem(object):
    """
    Simple lazy spectrum-retriever wrapper
    """
    def __init__(self, parent):
        self.parent = parent
        self.sphdr = {}
        self.nind = len(self.parent.allind)
        self.nloaded = 0

    def __repr__(self):
        return ("Set of {0} spectra & headers, {1} loaded"
                " ({2:0.2f}%)".format(self.nind, self.nloaded,
                                      (float(self.nloaded)/self.nind)*100))

    def load_all(self, progressbar=True):
        self.load(range(self.nind))

    def load(self, indices, progressbar=True):
        pb = ProgressBar(len(indices))
        counter = 0
        for k in indices:
            self[k]
            counter += 1
            pb.update(counter)

    def __getitem__(self, key):
        if key in self.sphdr:
            return self.sphdr[key]
        elif isinstance(key, slice):
            return [self[k] for k in xrange(key.start or 0,
                                            key.end or len(self.parent.allind),
                                            key.step or 1)]
        else:
            sphd = read_observation(self.parent._file, key,
                                    file_description=self.parent.file_description,
                                    indices=self.parent.allind,
                                    my_memmap=self.parent._data)
            # Update the header with OTFSCAN and POSANG info
            sphd[1].update(self.parent.allind[key])
            self.sphdr[key] = sphd
            self.nloaded += 1
            return sphd

    def __iter__(self):
        return self.next()

    def __next__(self):
        for k in self.spheader:
            yield self.spheader[k]

    def __contains__(self, key):
        return key in self.sphdr





[docs]
@print_timing
def class_to_spectra(filename, datatuple=None, **kwargs):
    """
    Load each individual spectrum within a CLASS file into a list of Spectrum
    objects
    """
    if datatuple is None:
        spectra,header,indexes = read_class(filename, **kwargs)
    else:
        spectra,header,indexes = datatuple

    spectrumlist = []
    for sp,hdr,ind in zip(spectra,header,indexes):
        hdr.update(ind)
        xarr = make_axis(hdr)
        spectrumlist.append(
            pyspeckit.Spectrum(xarr=xarr,
                               header=hdr,
                               data=sp))

    return pyspeckit.Spectra(spectrumlist)



[docs]
def tests():
    """
    Tests are specific to the machine on which this code was developed.
    """
    fn1 = '/Users/adam/work/bolocam/hht/class_003.smt'
    #fn1 = '/Users/adam/work/bolocam/hht/class_001.smt'
    #fn1 = '/Users/adam/work/bolocam/hht/test_SMT-F1M-VU-20824-073.cls'
    #fn2 = '/Users/adam/work/bolocam/hht/test_SMT-F1M-VU-79472+203.cls'
    #F1 = read_class(fn1)#,DEBUG=True)
    #F2 = read_class(fn2)
    n2hp = class_to_obsblocks(fn1,telescope=['SMT-F1M-HU','SMT-F1M-VU'],line=['N2HP(3-2)','N2H+(3-2)'])
    hcop = class_to_obsblocks(fn1,telescope=['SMT-F1M-HL','SMT-F1M-VL'],line=['HCOP(3-2)','HCO+(3-2)'])
Source code for pyspeckit.spectrum.readers.read_class

Table Of Contents

Search