# -*- coding: utf-8 -*-
"""
Created on Fri Jul 16 11:48:28 2010

@author: P. Thibault
"""
import h5py
import numpy as np
import cPickle
import time
import os.path
import glob

__all__ = ['h5write', 'h5append', 'h5read', 'h5info', 'h5options']

h5options=dict(
H5RW_VERSION = '0.1',
H5PY_VERSION = h5py.version.version,
#UNSUPPORTED = 'pickle'
#UNSUPPORTED = 'ignore'
UNSUPPORTED = 'fail',
SLASH_ESCAPE = '_SLASH_')

def sdebug(f):
    """ 
    debugging decorator for _store functions
    """
    def newf(*args, **kwds):
        print '{0:20} {1:20}'.format(f.func_name, args[2])
        return f(*args, **kwds)
    newf.__doc__ = f.__doc__
    return newf

# Helper functions to load slices
class Str_to_Slice(object):
    def __getitem__(self, x):
        return x
    def __call__(self, s):
        return eval('self' + s)
str_to_slice = Str_to_Slice()

def _h5write(filename, mode, *args, **kwargs):
    """\
    _h5write(filename, mode, {'var1'=..., 'var2'=..., ...})
    _h5write(filename, mode, var1=..., var2=..., ...)
    _h5write(filename, mode, dict, var1=..., var2=...)
    
    Writes variables var1, var2, ... to file filename. The file mode
    can be chosen according to the h5py documentation. The key-value
    arguments have precedence on the provided dictionnary.

    supported variable types are:
    * scalars
    * numpy arrays
    * strings
    * lists
    * dictionaries

    (if the option UNSUPPORTED is equal to 'pickle', any other type
    is pickled and saved. UNSUPPORTED = 'ignore' silently eliminates
    unsupported types. Default is 'fail', which raises an error.) 
    
    The file mode can be chosen according to the h5py documentation.
    It defaults to overwriting an existing file.
    """

    filename = os.path.abspath(os.path.expanduser(filename))

    ctime = time.asctime()
    mtime = ctime

    # Update input dictionnary
    if args:
        d = args[0].copy() # shallow copy
    else:
        d = {}
    d.update(kwargs)

    # List of object ids to make sure we are not saving something twice.
    ids = []
    
    # This is needed to store strings
    dt = h5py.new_vlen(str)

    def check_id(id):
        if id in ids:
            raise RuntimeError('Circular reference detected! Aborting save.')
        else:
            ids.append(id)

    def pop_id(id):
        ids[:] = [x for x in ids if x!=id]

    #@sdebug
    def _store_numpy(group, a, name, compress=True):
        if compress:
            dset = group.create_dataset(name, data=a, compression='gzip')
        else:
            dset = group.create_dataset(name, data=a)
        dset.attrs['type'] = 'array'
        return dset

    #@sdebug
    def _store_string(group, s, name):
        dset = group.create_dataset(name, data=np.asarray(s), dtype=dt)
        dset.attrs['type'] = 'string'
        return dset

    #@sdebug
    def _store_unicode(group, s, name):
        dset = group.create_dataset(name, data=np.asarray(s.encode('utf8')), dtype=dt)
        dset.attrs['type'] = 'unicode'
        return dset


    #@sdebug    
    def _store_list(group, l, name):
        check_id(id(l))
        arrayOK = len(set([type(x) for x in l])) == 1
        if arrayOK:
            try:
                # Try conversion to a numpy array
                la = np.array(l)
                if la.dtype.type is np.string_:
                    arrayOK = False
                else:
                    dset = _store_numpy(group,la,name)
                    dset.attrs['type'] = 'arraylist'
            except:
                arrayOK = False
        if not arrayOK:
            # inhomogenous list. Store all elements individually
            dset = group.create_group(name)
            for i,v in enumerate(l):
                _store(dset, v, '%05d' % i)
            dset.attrs['type'] = 'list'
        pop_id(id(l))
        return dset

    #@sdebug
    def _store_tuple(group, t, name):
        dset = _store_list(group, list(t), name)
        dset_type = dset.attrs['type']
        dset.attrs['type'] = 'arraytuple' if dset_type == 'arraylist' else 'tuple'
        return dset
    
    #@sdebug
    def _store_dict(group, d, name):
        check_id(id(d))
        if any([type(k) not in [str, unicode] for k in d.keys()]):
            raise RuntimeError('Only dictionaries with string keys are supported.')
        dset = group.create_group(name)
        dset.attrs['type'] = 'dict'
        for k,v in d.iteritems():
            if k.find('/') > -1:
                k = k.replace('/',h5options['SLASH_ESCAPE'])
                ndset = _store(dset, v, k)
                if ndset is not None:
                    ndset.attrs['escaped'] = '1'
            else:
                _store(dset, v, k)
        pop_id(id(d))
        return dset

    def _store_dict_new(group, d, name):
        check_id(id(d))
        dset = group.create_group(name)
        dset.attrs['type'] = 'dict'
        for i,kv in enumerate(d.iteritems()):
            _store(dset, kv, '%05d' % i)
        pop_id(id(d))
        return dset
    
    #@sdebug
    def _store_None(group, a, name):
        dset = group.create_dataset(name, data = np.zeros((1,)))
        dset.attrs['type'] = 'None' 
        return dset
        
    #@sdebug
    def _store_pickle(group, a, name):
        apic = cPickle.dumps(a)
        dset = group.create_dataset(name, data=np.asarray(apic), dtype=dt)
        dset.attrs['type'] = 'pickle'
        return dset
    
    #@sdebug
    def _store(group, a, name):
        if type(a) is str:
            dset = _store_string(group,a,name)
        elif type(a) is unicode:
            dset = _store_unicode(group,a,name)
        elif type(a) is dict:
            dset = _store_dict(group,a,name)
        elif type(a) is list:
            dset = _store_list(group,a,name)
        elif type(a) is tuple:
            dset = _store_tuple(group,a,name)
        elif type(a) is np.ndarray:
            dset = _store_numpy(group,a,name)
        elif np.isscalar(a):
            dset = _store_numpy(group,np.asarray(a),name, compress=False)
            dset.attrs['type'] = 'scalar'
        elif a is None:
            dset = _store_None(group, a, name)
        else:
            if h5options['UNSUPPORTED']=='fail':
                raise RuntimeError('Unsupported data type : %s' % type(a))
            elif h5options['UNSUPPORTED']=='pickle':
                dset = _store_pickle(group,a,name)
            else:
                dset = None 
        return dset
 
    # Open the file and save everything
    with h5py.File(filename,mode) as f:
        f.attrs['h5rw_version'] = h5options['H5RW_VERSION']
        f.attrs['ctime'] = ctime
        f.attrs['mtime'] = mtime
        for k,v in d.iteritems():
            _store(f,v,k)
    
    return
    
def h5write(filename, *args, **kwargs):
    """\
    h5write(filename, {'var1'=..., 'var2'=..., ...})
    h5write(filename, var1=..., var2=..., ...)
    h5write(filename, dict, var1=..., var2=...)
    
    Writes variables var1, var2, ... to file filename. The key-value
    arguments have precedence on the provided dictionnary.

    supported variable types are:
    * scalars
    * numpy arrays
    * strings
    * lists
    * dictionaries

    (if the option UNSUPPORTED is equal to 'pickle', any other type
    is pickled and saved. UNSUPPORTED = 'ignore' silently eliminates
    unsupported types. Default is 'fail', which raises an error.) 
    
    The file mode can be chosen according to the h5py documentation.
    It defaults to overwriting an existing file.
    """

    _h5write(filename, 'w', *args, **kwargs)
    return

def h5append(filename, *args, **kwargs):
    """\
    h5append(filename, {'var1'=..., 'var2'=..., ...})
    h5append(filename, var1=..., var2=..., ...)
    h5append(filename, dict, var1=..., var2=...)
    
    Appends variables var1, var2, ... to file filename. The 
    key-value arguments have precedence on the provided dictionnary.

    supported variable types are:
    * scalars
    * numpy arrays
    * strings
    * lists
    * dictionaries

    (if the option UNSUPPORTED is equal to 'pickle', any other type
    is pickled and saved. UNSUPPORTED = 'ignore' silently eliminates
    unsupported types. Default is 'fail', which raises an error.) 
    
    The file mode can be chosen according to the h5py documentation.
    It defaults to overwriting an existing file.
    """

    _h5write(filename, 'a', *args, **kwargs)
    return

def h5read(filename, *args, **kwargs):
    """\
    h5read(filename)
    h5read(filename, s1, s2, ...)
    h5read(filename, (s1,s2, ...))
    
    Read variables from a hdf5 file created with h5write and returns them as 
    a dictionary.
    
    If specified, only variable named s1, s2, ... are loaded.
    
    Variable names support slicing and group access. For instance, provided 
    that the file contains the appropriate objects, the following syntax is 
    valid:
        
    a = h5read('file.h5', 'myarray[2:4]')
    a = h5read('file.h5', 'adict.thekeyIwant')
    
    Another way of slicing, is with the slice keyword argument, which will take
    the provided slice object and apply it on the last variable name:

    a = h5read('file.h5', 'array1', 'array2', slice=slice(1,2))
    # Will read array2[1:2]

    h5read(filename_with_wildcard, ... , doglob=True)
    Reads sequentially all globbed filenames. 
 
    """
    doglob = kwargs.get('doglob', None)

    # Used if we read a list of files
    fnames = []
    if not isinstance(filename, str):
        # We have a list
        fnames = filename
    else:
        if doglob is None:
            # glob only if there is a wildcard in the filename
            doglob = glob.has_magic(filename)
        if doglob:
            fnames = sorted(glob.glob(filename))
            if not fnames:
                raise IOError('%s : no match.' % filename)
      
    if fnames:
        # We are here only if globbing was allowed.
        dl = []
        # Loop over file names
        for f in fnames:
            # Call again, but this time without globbing.
            d = h5read(f, *args, doglob=False, **kwargs)
            dl.append(d) 
        return dl
    
    # We are here only if there was no globbing (fnames is empty)
    filename = os.path.abspath(os.path.expanduser(filename))

    # Define helper functions
    def _load_dict_new(dset):
        d = {}
        keys = dset.keys()
        keys.sort()
        for k in keys:
            dk, dv = _load(dset[k])
            d[dk] = dv
        return d

    def _load_dict(dset):
        d = {}
        for k,v in dset.items():
            if v.attrs.get('escaped', None) is not None:
                k = k.replace(h5options['SLASH_ESCAPE'], '/')
            d[k] = _load(v)
        return d
    
    def _load_list(dset):
        l = []
        keys = dset.keys()
        keys.sort()
        for k in keys:
            l.append(_load(dset[k]))
        return l
    
    def _load_numpy(dset,sl=None):
        if sl is not None:
            return dset[sl]
        else:
            return dset[...]
        
    def _load_scalar(dset):
        try:
            return dset[...].item()
        except:
            return dset[...]
       
    def _load_str(dset):
        return dset.value

    def _load_unicode(dset):
        return dset.value.decode('utf8')
        
    def _load_pickle(dset):
        return cPickle.loads(dset[...])

    def _load(dset, sl=None):
        dset_type = dset.attrs.get('type',None)
      
        # Treat groups as dicts
        if (dset_type is None) and (type(dset) is h5py.Group):
            dset_type = 'dict'    

        if dset_type == 'dict':
            if sl is not None:
                raise RuntimeError('Dictionaries do not support slicing')
            val = _load_dict(dset)
        elif dset_type == 'list':
            val = _load_list(dset)
            if sl is not None:
                val = val[sl]
        elif dset_type == 'array':
            val = _load_numpy(dset,sl)
        elif dset_type == 'arraylist':
            val = [x for x in _load_numpy(dset)]
            if sl is not None:
                val = val[sl]
        elif dset_type == 'tuple':
            val = tuple(_load_list(dset))
            if sl is not None:
                val = val[sl]
        elif dset_type == 'arraytuple':
            val = tuple(_load_numpy(dset).tolist())
            if sl is not None:
                val = val[sl]
        elif dset_type == 'string':
            val = _load_str(dset)
            if sl is not None:
                val = val[sl]
        elif dset_type == 'unicode':
            val = _load_str(dset)
            if sl is not None:
                val = val[sl]
        elif dset_type == 'scalar':
            val = _load_scalar(dset)
        elif dset_type == 'None':
            # 24.4.13 : B.E. commented due to hr5read not being able to return None type
            #try:
            #   val = _load_numpy(dset)
            #except:
            #    val = None
            val = None
        elif dset_type == 'pickle':
            val = _load_pickle(dset)
        elif dset_type is None:
            val = _load_numpy(dset,sl)
        else:
            raise RuntimeError('Unsupported data type : %s' % dset_type)
        return val

    # Read file content
    outdict = {}

    slice = kwargs.get('slice', None)

    try:
        f = h5py.File(filename,'r')
    except:
        print 'Error when opening file %s.' % filename
        raise
    else:
        with f:
            h5rw_version = f.attrs.get('h5rw_version',None)
            if h5rw_version is None:
                print('Warning: this file does not seem to follow h5read format.')
            ctime = f.attrs.get('ctime', None)
            if ctime is not None:
                print('File created : ' + ctime)
            if len(args) == 0:
                # no input arguments - load everything
                if slice is not None:
                    raise RuntimeError('A variable name must be given when slicing.')
                key_list = f.keys()
            else:
                if (len(args) == 1) and (type(args[0]) is list):
                    # input argument is a list of object names
                    key_list = args[0]
                else:
                    # arguments form a list
                    key_list = list(args)

            last_k = key_list[-1]
            for k in key_list:
                if k == last_k and slice is not None: 
                    sl = slice
                else:
                    # detect slicing
                    if '[' in k:
                        k,slice_string = k.split('[')
                        slice_string = slice_string.split(']')[0]
                        sl = str_to_slice('[' + slice_string + ']')
                    else:
                        sl = None
                
                # detect group access
                if '.' in k:
                    glist = k.split('.')
                    k = glist[-1]
                    gr = f[glist[0]]
                    for gname in glist[1:-1]:
                        gr = gr[gname]
                    outdict[k] = _load(gr[k],sl)
                else:
                    outdict[k] = _load(f[k],sl)
      
    return outdict      

    
def h5info(filename, output=None, print_on=True):
    """\
    h5info(filename)

    Prints out a tree structure of given h5 file.
    
    [17/01/2012 guillaume potdevin]
    added optional argument output:
    	if output is set to 1, then the printed string is returned
    	
    optional argument "print_on":
    Set to false to suppress output of function to stdout.
    """

    indent = 4
    filename = os.path.abspath(os.path.expanduser(filename))

    def _format_dict(key, dset):
        stringout = ' '*key[0] + ' * %s [dict]:\n' % key[1] 
        for k,v in dset.items():
            if v.attrs.get('escaped', None) is not None:
                k = k.replace(h5options['SLASH_ESCAPE'], '/')
            stringout += _format((key[0]+indent, k), v)
        return stringout
    
    def _format_list(key, dset):
        stringout = ' '*key[0] + ' * %s [list]:\n' % key[1] 
        keys = dset.keys()
        keys.sort()
        for k in keys:
            stringout += _format((key[0]+indent, ''), dset[k])
        return stringout

    def _format_tuple(key, dset):
        stringout = ' '*key[0] + ' * %s [tuple]:\n' % key[1] 
        keys = dset.keys()
        keys.sort()
        for k in keys:
            stringout += _format((key[0]+indent, ''), dset[k])
        return stringout

    def _format_arraytuple(key, dset):
        a = dset[...]
        if len(a) < 5:
            stringout = ' '*key[0] + ' * ' + key[1] + ' [tuple = ' + str(tuple(a.ravel())) + ']\n'
        else:
            try:
                float(a.ravel()[0])
                stringout = ' '*key[0] + ' * ' + key[1] + ' [tuple = (' + (('%f, '*4) % tuple(a.ravel()[:4]) ) + ' ...)]\n'
            except ValueError:
                stringout = ' '*key[0] + ' * ' + key[1] + ' [tuple = (%d x %s objects)]\n' % (a.size, str(a.dtype))
        return stringout

    def _format_arraylist(key, dset):
        a = dset[...]
        if len(a) < 5:
            stringout = ' '*key[0] + ' * ' + key[1] + ' [list = ' + str(a.tolist()) + ']\n'
        else:
            try:
                float(a.ravel()[0])
                stringout = ' '*key[0] + ' * ' + key[1] + ' [list = [' + (('%f, '*4) % tuple(a.ravel()[:4]) ) + ' ...]]\n'
            except ValueError:
                stringout = ' '*key[0] + ' * ' + key[1] + ' [list = [%d x %s objects]]\n' % (a.size, str(a.dtype))
        return stringout
    
    def _format_numpy(key, dset):
        a = dset[...]
        if len(a) < 5 and a.ndim == 1:
            stringout = ' '*key[0] + ' * ' + key[1] + ' [array = ' + str(a.ravel()) + ']\n'
        else:
            stringout = ' '*key[0] + ' * ' + key[1] + ' [' + (('%dx'*(a.ndim-1) + '%d') % a.shape) + ' ' + str(a.dtype) + ' array]\n'
        return stringout
        
    def _format_scalar(key, dset):
        stringout = ' '*key[0] + ' * ' + key[1] + ' [scalar = ' + str(dset[...]) + ']\n'
        return stringout
       
    def _format_str(key, dset):
        s = str(dset[...])
        if len(s) > 40:
            s = s[:40] + '...'
        stringout = ' '*key[0] + ' * ' + key[1] + ' [string = "' + s + '"]\n'
        return stringout

    def _format_unicode(key, dset):
        s = str(dset[...]).decode('utf8')
        if len(s) > 40:
            s = s[:40] + '...'
        stringout = ' '*key[0] + ' * ' + key[1] + ' [unicode = "' + s + '"]\n'
        return stringout
       
    def _format_pickle(key, dset):
        stringout = ' '*key[0] + ' * ' + key[1] + ' [pickled object]\n'
        return stringout

    def _format_None(key, dset):
        stringout = ' '*key[0] + ' * ' + key[1] + ' [None]\n'
        return stringout

    def _format_unknown(key, dset):
        stringout = ' '*key[0] + ' * ' + key[1] + ' [unknown]\n'
        return stringout

    def _format(key, dset):
        dset_type = dset.attrs.get('type',None)
      
        # Treat groups as dicts
        if (dset_type is None) and (type(dset) is h5py.Group):
            dset_type = 'dict'

        if dset_type == 'dict':
            stringout = _format_dict(key, dset)
        elif dset_type == 'list':
            stringout = _format_list(key, dset)
        elif dset_type == 'array':
            stringout = _format_numpy(key, dset)
        elif dset_type == 'arraylist':
            stringout = _format_arraylist(key, dset)
        elif dset_type == 'tuple':
            stringout = _format_tuple(key, dset)
        elif dset_type == 'arraytuple':
            stringout = _format_arraytuple(key, dset)
        elif dset_type == 'string':
            stringout = _format_str(key, dset)
        elif dset_type == 'unicode':
            stringout = _format_unicode(key, dset)
        elif dset_type == 'scalar':
            stringout = _format_scalar(key, dset)
        elif dset_type == 'None':
            try:
                stringout = _format_numpy(key, dset)
            except:
                stringout = _format_None(key, dset)
        elif dset_type == 'pickle':
            stringout = _format_pickle(dset)
        elif dset_type is None:
            stringout = _format_numpy(key, dset)
        else:
            stringout = _format_unknown(key, dset)
        return stringout
        
    with h5py.File(filename,'r') as f:
        h5rw_version = f.attrs.get('h5rw_version',None)
        if h5rw_version is None:
            print('Warning: this file does not seem to follow h5read format.')
        ctime = f.attrs.get('ctime', None)
        if ctime is not None:
            print('File created : ' + ctime)
        key_list = f.keys()
        outstring = ''
        for k in key_list:
            outstring += _format((0,k),f[k])
    
    if print_on:
        print outstring
    
    # return string if output variable passed as option
    if output != None:
	    return outstring