#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 26 15:17:03 2017

@author: christian
"""

from __future__ import division, print_function   # if running Python 2.7, run this line


import numpy as np                # numerical analysis
import matplotlib.pyplot as plt   # plotting package
import pandas as pd               # excel-like dataframes 
import seaborn as sns
from mpl_toolkits.basemap import Basemap


#==============================================================================
#  initial parameters
#==============================================================================


CreatePlots = True 
SavePlots = False
PrintAll = False
Find_Dist = False
Find_Dist_ALL = False
plt.close('all') # close all previously open figures

pd.set_option('display.max_rows', 10) # number of max rows to print for a DataFrame  
sns.set_context("poster") # change some standard settings of plots
current_palette = sns.color_palette() # some nicer colors. Can be accessed with current_palette[0] eg.

house_index = 0           # Lolland
#house_index = 12595      # Strandvejen
#house_index = 7895       # Strandvejen




#==============================================================================
#  Read data
#==============================================================================


print("\n\n  --------------------------------------------------------------  ")
print("      Housing prices analysis")
print("  ------------------------------------------------------------------ \n\n ")

# read csv file:
df = pd.read_csv('HousingPrices_Cleaned.csv')  # other often used options: sep=';', decimal=".", na_values=['NA', 'NAN', '.']
print("Dataframe loaded with dimension: ", df.shape, "\n")
if PrintAll:
    print(df.head(), "\n")

# pandas recognizes floats and integers automatically, but needs some help with the dates:
dates_columns = ['DATE_OF_SALES_PRICE', 'DATE_OF_PREVIOUS_SALES_PRICE_FIRST', 'OMREGNINGS_DATO']
for column in dates_columns:
    df[column] = pd.to_datetime(df[column], format="%Y-%m-%d", errors='coerce')

#types of data in the different columns
types = df.dtypes
if PrintAll:
    print("Types of data:", types, "\n")


df = df[(~df.N_COORDINATE.isnull()) & (~df.E_COORDINATE.isnull())]
GPS = pd.read_csv('GPS_data.csv')
df = pd.concat([df, GPS], axis=1)
appartment_x = df.loc[house_index].copy()


## =============================================================================
## Adding coordinates
## =============================================================================
#
#
#import googlemaps
#gmaps = googlemaps.Client(key='AIzaSyC53gDzC9TbUIOqDrLOhi6P9BhTKb1hElY')
#
#address = appartment_x.ADDRESS # address might need to be string formatted
#postcode = appartment_x.POSTAL_CODE
#search_string = address + " " + str(int(postcode))
#geocode_result = gmaps.geocode(search_string)
#    
#location = geocode_result[0]["geometry"]["location"]
#appartment_x['gLAT'], appartment_x['gLON'] = np.array([location["lat"], location["lng"]])
#print("Google Maps Search, Coords:")
#print(appartment_x['gLAT'], appartment_x['gLON'])
#
#
## =============================================================================
## From UTM coordiantes to LAT / LON
## =============================================================================
#
#
#import utm # pip install utm --user
##utm.to_latlon(E, N, 32, northern=True)
#appartment_x['utmLAT'], appartment_x['utmLON'] = utm.to_latlon(appartment_x.E_COORDINATE, appartment_x.N_COORDINATE, 32, northern=True)
#print("UTM to LAT/LON, Coords:")
#print(appartment_x['utmLAT'], appartment_x['utmLON'])
#print("\n\n")


## =============================================================================
## Save coordinates to dataframe
## =============================================================================
#
#N = len(df)
#LAT_GPS = np.zeros(N)
#LON_GPS = np.zeros(N)
#for i in range(N):
#    LAT_GPS[i], LON_GPS[i] = utm.to_latlon(df['E_COORDINATE'].iloc[i], df['N_COORDINATE'].iloc[i], 32, northern=True)
#GPS = {'LAT': LAT_GPS, 'LON': LON_GPS}
#df_GPS = pd.DataFrame(GPS)
#df_GPS.to_csv('GPS_data.csv', index=False)

# =============================================================================
# Plot GPS coordinates
# =============================================================================


if CreatePlots:

    lllon = 7.8
    lllat = 54.5
    urlon = 13.0
    urlat = 57.8
    
    lat0 = 55.6815
    lon0 = 12.5697

    # We use the "intermediate" resolution (option "i"), simply to make things run faster.
    my_map = Basemap(projection='merc', lat_0 = lat0, lon_0 = lon0,
        resolution = 'i', area_thresh = 0.1,   # resolution either full (f), high (h), intermediate (i) or low(l)
        llcrnrlon=lllon, llcrnrlat=lllat,     # lattitude is up / down
        urcrnrlon=urlon, urcrnrlat=urlat)     # longitude is the left / right
    
    x_gps, y_gps = my_map(appartment_x.LON, appartment_x.LAT) # can be lists or arrays as well   
#    x_gps_lon, y_gps_lats = my_map(lons, lats) # can be lists or arrays as well   
    
    
    fig, ax = plt.subplots(figsize=(10*1.2, 8*1.2), facecolor='w')
#    ax = fig.add_subplot(111) #  
    
    # draw map details
    my_map.drawmapboundary(fill_color = 'white')
    coast = my_map.drawcoastlines()
    my_map.fillcontinents(color='#C0C0C0')
    my_map.drawcountries(
        linewidth=.75, linestyle='solid', color='#000073',
        antialiased=True,
        ax=ax, zorder=3)
    my_map.drawparallels(
        np.arange(round(lllat), round(urlat), 1.),
        color = 'black', linewidth = 0.5,
        labels=[True, False, False, False])
    my_map.drawmeridians(
        np.arange(round(lllon), round(urlon), 2.),
        color = '0.25', linewidth = 0.5,
        labels=[False, False, False, True])
    my_map.drawmapscale(
        8.5, 57.5, lon0, lat0,
        100,
        units='km', fontsize=10,
        yoffset=None,
        barstyle='simple', labelstyle='simple',
        fillcolor1='w', fillcolor2='#000000',
        fontcolor='#000000',
        zorder=5)
    
    my_map.plot(x_gps, y_gps, 'o', color=current_palette[2])
#    my_map.plot(x_gps_lon, y_gps_lats, '-', color=current_palette[0], lw=1)
    
    plt.title("House Prices in Denmark")
    if SavePlots:
        plt.savefig("./figures/HousePrices_overview.pdf", format="pdf", dpi=600, transparent=True)
    plt.show()

    # "coast" contains the segments copied from "Basemap", which is a Python package containing a world map.
    # However, the coordinates are in pixel values, and hence needs to be converted to latitude and longitude.
    coordinates_coast = np.vstack(coast.get_segments())
    lons_coast, lats_coast = my_map(coordinates_coast[:,0], coordinates_coast[:,1], inverse=True)
    # Alternatively, one can read a local file with a map, see below.
    
    
    
# =============================================================================
# How to work with shapefiles
# 
# taken from http://www.eurogeographics.org/products-and-services/euroglobalmap
# =============================================================================


def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = np.radians([lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    km = 6367 * c
    return km

def min_dist(lat0, lon0, lats, lons):
    dist = np.sqrt( (lat0-lats)**2 + (lon0-lons)**2  )
    index_min = np.argmin(dist)
    dist_min = haversine(lat0, lon0, lats[index_min], lons[index_min])
    return dist_min
   
def interpolation(iteratable):
    from scipy.interpolate import interp1d 
    x = []
    y = []
    for i, shape in enumerate(iteratable):
        xx, yy = zip(*shape)
        xx = np.array(xx)
        yy = np.array(yy)
        if xx.min() > 0 and yy.min() > 0: # if inside map of Denmark
            for ii in range(len(xx)-1):
                xx2 = xx[ii:ii+2]
                yy2 = yy[ii:ii+2]
                p = xx2.argsort()
                xx3 = xx2[p]
                yy3 = yy2[p]
                f = interp1d(xx3, yy3)
                xnew = np.linspace(xx3[0], xx3[1], N_interpolation)
                ynew = f(xnew)
                x.append(xnew)        
                y.append(ynew)
    x = np.array(x).flatten()
    y = np.array(y).flatten()
    return x, y


def get_shapefile_coords(shapefile, N_interpolation=10):
    lllon, lllat, urlon, urlat, lat0, lon0 = [7.8, 54.5, 13.0, 57.8, 55.68150, 12.5697]
    print('Loading My Map') 

    mmap = Basemap(projection='merc', lat_0 = lat0, lon_0 = lon0,
        resolution = 'i', area_thresh = 0.1,   # resolution either full (f), high (h), intermediate (i) or low(l)
        llcrnrlon=lllon, llcrnrlat=lllat,     # lattitude is up / down
        urcrnrlon=urlon, urcrnrlat=urlat)     # longitude is the left / right    
    
    mmap.readshapefile(shapefile, 'shapefile', drawbounds = False)   
    
    x, y = interpolation(mmap.shapefile)
    lons, lats = mmap(x, y, inverse=True)
    return lats, lons 
    


# Here, we read the map from a local file (shape file) obtained from EuroGraphics (see link above):
# The below example is just for one location as a test:
if Find_Dist and CreatePlots:
    shapefile = './files/CoastL' # needs files: .dbf, .shp, .shx
#    shapefile = './files/LakeresA' # needs files: .dbf, .shp, .shx
    N_interpolation = 1

    # First find the latitute and longitude, then find the minimum distance to coast:
    # For the shape file:
    lats_shapefile, lons_shapefile = get_shapefile_coords(shapefile, N_interpolation)
    dist_shapefile = min_dist(appartment_x['LAT'], appartment_x['LON'], lats_shapefile, lons_shapefile)
    print("Distance from appartment x to the coast by shapefile: ", dist_shapefile)    

    # For the basemap:
    dist_basemap = min_dist(appartment_x['LAT'], appartment_x['LON'], lats_coast, lons_coast)
    print("Distance from appartment x to the coast by Basemap: ", dist_basemap)    

    # One may choose to use either, but having two versions of course allows for good validation.
    # In general, shape files also exist with other information (such as e.g. roads).



# =============================================================================
# Calculate minimum distance and save dists to dataframe for ALL locations:
# =============================================================================

if Find_Dist_ALL and Find_Dist and CreatePlots:

    N = len(df)
    dists = np.zeros(N)
    for i in range(N):
        dists[i] = min_dist(df['LAT'].iloc[i], df['LON'].iloc[i], lats_coast, lons_coast)
        
    df_dists = pd.DataFrame({'SEA_DIST': dists})
    df_dists.to_csv('SEA_DIST.csv', index=False)
