%matplotlib inline
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns 
sns.set(style="darkgrid")
import numpy as np


# Load Data
_nedc1 = pd.read_table('nedc1.csv')
_nedc2 = pd.read_table('nedc2.csv')
_perm1 = pd.read_table('perm1.csv')
_perm2 = pd.read_table('perm2.csv')
_sine1 = pd.read_table('sine1.csv')
_sine2 = pd.read_table('sine2.csv')
_double1 = pd.read_table('double1.csv')
_double2 = pd.read_table('double2.csv')


# Remove data points before and after each test cycle
def trimData(df, cycleLen=1180):
    firstRowIndex = df['Time'].gt(-0.5).idxmax()
    return df[firstRowIndex:firstRowIndex+cycleLen].reset_index(drop=True)

nedc1 = trimData(_nedc1)
nedc2 = trimData(_nedc2)
perm1 = trimData(_perm1)
perm2 = trimData(_perm2)
double1 = trimData(_double1, cycleLen=2*1180)
double2 = trimData(_double2, cycleLen=2*1180)
sine1 = trimData(_sine1)
sine2 = trimData(_sine2)


# Constants
VELOCITY_KEY = 'Velocity ECU'
NOX_KEY = 'mdot_NOx DC'


# Merging and splitting experiments

def rename(additionalString):
    return (lambda x: str(x) + str(additionalString))

def renameBack(additionalString):
    def f(s):
        if s[-len(additionalString):] == additionalString:
            return s[:-len(additionalString)]
        else:
            raise Exception('String was not renamed by adding "additionalString" previously. Cannot rename back!')
    return f

renameRef = rename(' Ref')
renameBackRef = renameBack(' Ref')


def joinExperiments(df1, df2, f_rename=renameRef):
    df1 = df1.rename(f_rename, axis='columns')
    joined = pd.concat([df1, df2], axis=1)
    return joined

def sortFrame(df, key, dropOldIndex=False):
    return df.sort_values(key).reset_index(drop=dropOldIndex)

def splitExperiments(joinedDf):
    if len(joinedDf.columns) % 2 != 0:
        raise Exception('Joined DFs must have even number of columns!')
    i = int(len(joinedDf.columns)/2)
    renamedColumns = joinedDf.columns[:i]
    originalColumns = joinedDf.columns[i:]
    def assertCorrectColumnNames(o, r):
        if r.find(o) == 0 and len(r) > len(o):
            return True
        else:
            raise Exception("Column names don't match!")
    list(map(assertCorrectColumnNames, originalColumns, renamedColumns))
    df1 = joinedDf.drop(columns=originalColumns)
    df1.columns = originalColumns
    df2 = joinedDf.drop(columns=renamedColumns)
    return (df1, df2)


# Take avergae of NEDC1 and NEDC2 data
def averageCycles(df1, df2):
    if (df1.columns != df2.columns).any():
        raise Exception('Can only average cycles with identical columns!')
    cols = df1.columns
    name = rename(' (Cycle 2)')
    joined = joinExperiments(df1, df2, name)
    avg = joined.apply(lambda row: pd.Series(map(lambda col: row[col] if col == 'Time' else (row[col] + row[name(col)])/2.0, cols), index=cols, name=row.name), axis=1)
    return avg

nedc = averageCycles(nedc1, nedc2)
# There is no OBD data for NEDC1, hence we keep the data from NEDC2; averaging results in incorrect values
for col in ['Velocity ECU', 'Engine Speed']:
    nedc[col] = nedc2[col]


# Integrating data
def integrateData(df, key, step=lambda prev, this: prev + this):
    df = df.copy()
    previous = 0.0
    for i in range(0, len(df)):
        previous = step(previous, df.loc[i, key])
        df.loc[i, key] = previous
    return df

def accumulatedNOxPerTotalDistance(df):
    distDf = integrateData(df, VELOCITY_KEY, lambda prev, v: prev + v/3.6)  # convert speed to m/s
    totalDistance = (distDf.loc[len(distDf)-1, VELOCITY_KEY])/1000  # convert distance to km
    return integrateData(df, NOX_KEY, lambda prev, v: prev + v*1000/totalDistance)


nedcAccumulated = accumulatedNOxPerTotalDistance(nedc)
perm1Accumulated = accumulatedNOxPerTotalDistance(perm1)
perm2Accumulated = accumulatedNOxPerTotalDistance(perm2)
double1Accumulated = accumulatedNOxPerTotalDistance(double1)
double2Accumulated = accumulatedNOxPerTotalDistance(double2)
sine1Accumulated = accumulatedNOxPerTotalDistance(sine1)
sine2Accumulated = accumulatedNOxPerTotalDistance(sine2)


# Find maximum value error
def getMaximumValueError(retimingDf, key, f_rename=renameRef):
    return max(retimingDf.apply(lambda row: abs(row[key] - row[f_rename(key)]), axis=1))


NOX_TICKS_DEFAULT = [0.0, 0.02, 0.04, 0.06, 0.08, 0.1]
NOX_TICKS_ACCUMULATED = [0.0, 80.0, 182.0, 316.0, 483.0, 632.0]

TIME_TICKS_DEFAULT = [0, 200,400,600,800,1000,1179]
TIME_TICKS_DOUBLE = [0, 200,400,600,800,1000,1180, 2359]

def plotCycles(df1, df2, title, timeTicks=TIME_TICKS_DEFAULT, noxTicks=NOX_TICKS_DEFAULT, timeValues=None):
    if isinstance(timeValues, type(None)):
        timeValues = df1['Time']
    speed1 = df1[VELOCITY_KEY]
    speed2 = df2[VELOCITY_KEY]
    nox1 = df1[NOX_KEY]
    nox2 = df2[NOX_KEY]
    
    fig, ax1 = plt.subplots(nrows=1)
    
    ax1.set_title(title)
    ax1.set_xlabel("Time")
    ax1.set_ylabel("Speed")
    ax1.plot(timeValues, speed1, color='tab:blue')
    ax1.plot(timeValues, speed2, color='tab:green')
    ax1.margins(x=0, y=0)
    ax1.get_yaxis().tick_left()
    ax1.tick_params(grid_linestyle='', labelleft=True, labelright=False, left=True, right=False)
    ax1.set_xticks(timeTicks)

    ay1 = ax1.twinx()
    ay1.set_ylabel("NOx")
    ay1.plot(timeValues, nox1, color='tab:orange')
    ay1.plot(timeValues, nox2, color='tab:red')
    ay1.set_yticks(noxTicks)
    ay1.tick_params(grid_linestyle='', labelleft=False, labelright=True, left=False, right=True)
    ay1.get_yaxis().tick_right()
    ay1.set_xticks(timeTicks)
    ay1.margins(x=0)

    fig.set_size_inches(16, 5)
    fig.set_dpi(144.0)


plotCycles(nedc, perm1, "Perm1 and averaged NEDC")


# For correctness of the following algorithms, it is necessary that the data is sampled by 1Hz
def assertEquiDistantDiscreteTime(df):
    time = df['Time']
    for i in range(1, len(time)):
        if (abs(time[i] - time[i-1] - 1.0)) > 10E-9:
            raise Exception('Data not sampled as expected!')

assertEquiDistantDiscreteTime(nedc1)
assertEquiDistantDiscreteTime(nedc2)
assertEquiDistantDiscreteTime(perm1)
assertEquiDistantDiscreteTime(perm2)
assertEquiDistantDiscreteTime(double1)
assertEquiDistantDiscreteTime(double2)
assertEquiDistantDiscreteTime(sine1)
assertEquiDistantDiscreteTime(sine2)


# r_p
euc0 = 6+5
euc1 = 0+4+8+2+3
euc2 = 16+5
euc3 = 0+5+2+5+24+8+3
euc4 = 16+5
euc5 = 0+5+2+9+2+8+12+8+13+2+7+3
euc6 = 7
euc = euc0 + euc1 + euc2 + euc3 + euc4 + euc5 + euc6
extra = 20+5+2+9+2+8+2+13+50+4+4+69+13+50+35+30+20+10+16+8+10+20
nedcLen = 4*euc + extra

ops = [euc0, euc1, euc2, euc3, euc4, euc5, euc6]

# NEDC ops
nedcOpsi = list(range(28))
nedcOps = ops + ops + ops + ops

# PermNEDC ops
ops1 = [euc0, euc3, euc2, euc5, euc4, euc1, euc6]
opsi1 = [0, 3, 2, 5, 4, 1, 6]
ops2 = [euc0, euc5, euc2, euc1, euc4, euc3, euc6]
opsi2 = [0+7, 5+7, 2+7, 1+7, 4+7, 3+7, 6+7]
ops3 = [euc0, euc5, euc2, euc3, euc4, euc1, euc6]
opsi3 = [0+14, 5+14, 2+14, 3+14, 4+14, 1+14, 6+14]
ops4 = [euc0, euc3, euc2, euc1, euc4, euc5, euc6]
opsi4 = [0+21, 3+21, 2+21, 1+21, 4+21, 5+21, 6+21]
ops1234 = ops1 + ops2 + ops3 + ops4
opsi1234 = opsi1 + opsi2 + opsi3 + opsi4

import functools
import operator

foldl = lambda func, acc, xs: functools.reduce(func, xs, acc)

totalOps1234 = foldl(lambda a, e: a + [a[-1]+e], [0], ops1234)
totalNedcOps = foldl(lambda a, e: a + [a[-1]+e], [0], nedcOps)

def getOpsLength(idx):
    return ops[idx % 7]

def findOpsIndexForTime(t, totalOps):
    for i in range(1, len(totalOps)):
        if t < totalOps[i]:
            return (i-1, t-totalOps[i-1])
    raise Exception("Time beyond ops")

def findOpsIndexForEUCIndex(idx, opsi):
    for i in range(len(opsi)):
        if opsi[i] == idx:
            return i
    raise Exception("Ops index not found")
    
def nedc2perm(t):
    if t >= 4*euc:
        # Extra urban -> identity
        return t
    
    (nedcOpsIndex, offset) = findOpsIndexForTime(t, totalNedcOps)
    nedcEucIndex = nedcOpsi[nedcOpsIndex] # in case of NEDC trivial, because identity
    permOpsIndex = findOpsIndexForEUCIndex(nedcEucIndex, opsi1234)
    return totalOps1234[permOpsIndex] + offset
    
def perm2nedc(t):
    if t >= 4*euc:
        # Extra urban -> identity
        return t
    
    (permOpsIndex, offset) = findOpsIndexForTime(t, totalOps1234)
    permEucIndex = opsi1234[permOpsIndex]
    nedcOpsIndex = findOpsIndexForEUCIndex(permEucIndex, nedcOpsi)
    return totalNedcOps[nedcOpsIndex] + offset

# Verify correctness
for i in range(780):
    if nedc2perm(perm2nedc(i)) != i:
        raise Exception("Oh no! Something is wrong at index " + str(i))


# Applying retiming function `nedc2perm` to transform PermNEDC results back into NEDC order
def doPermRetiming1(nedc, perm, nedcRenaming=renameRef):
    nedc = nedc.rename(nedcRenaming, axis='columns')
    retiming = nedc.apply(lambda row: pd.concat([row, perm.loc[nedc2perm(row.name)]]), axis=1)
    return retiming

# Applying retiming function `perm2nedc` to transform NEDC results into PermNEDC order
def doPermRetiming2(nedc, perm, nedcRenaming=renameRef):
    nedc = nedc.rename(nedcRenaming, axis='columns')
    retiming = perm.apply(lambda row: pd.concat([nedc.loc[perm2nedc(row.name)], row]), axis=1)
    return retiming

perm1Retiming1 = doPermRetiming1(nedc, perm1)
recoveredPerm1 = splitExperiments(perm1Retiming1)[1]
recoveredPerm1Accumulated = accumulatedNOxPerTotalDistance(recoveredPerm1)
plotCycles(nedcAccumulated, recoveredPerm1Accumulated, "Perm1 and averaged NEDC (nedc2perm)", noxTicks=NOX_TICKS_ACCUMULATED)

perm2Retiming1 = doPermRetiming1(nedc, perm2)
recoveredPerm2 = splitExperiments(perm2Retiming1)[1]
recoveredPerm2Accumulated = accumulatedNOxPerTotalDistance(recoveredPerm2)
plotCycles(nedcAccumulated, recoveredPerm2Accumulated, "Perm2 and averaged NEDC (nedc2perm)", noxTicks=NOX_TICKS_ACCUMULATED)

nedcPerm1Retiming = doPermRetiming2(nedc, perm1)  # Doesn't matter if we pick perm1 or perm2 as both use the same retiming function
permutedNEDC = splitExperiments(nedcPerm1Retiming)[0]
permutedNEDCAccumulated = accumulatedNOxPerTotalDistance(permutedNEDC)
plotCycles(permutedNEDCAccumulated, perm1Accumulated, "Perm1 and averaged NEDC (perm2nedc)", noxTicks=NOX_TICKS_ACCUMULATED, timeValues=perm1['Time'])

nedcPerm2Retiming = doPermRetiming2(nedc, perm2)  # Provide for future applications
plotCycles(permutedNEDCAccumulated, perm2Accumulated, "Perm2 and averaged NEDC (perm2nedc)", noxTicks=NOX_TICKS_ACCUMULATED, timeValues=perm2['Time'])


# Maximum value error for PermNEDC1
max(getMaximumValueError(perm1Retiming1, VELOCITY_KEY), getMaximumValueError(nedcPerm1Retiming, VELOCITY_KEY))

16.0


# Maximum value error for PermNEDC2
max(getMaximumValueError(perm2Retiming1, VELOCITY_KEY), getMaximumValueError(nedcPerm2Retiming, VELOCITY_KEY))

11.0


# Ret_a (a.k.a. anarchy retiming)
# Picks random retiming functions (r_1, r_2) that minimise value error
# This is done by sorting the each df by velocity and joining them thereafter
# The joined table is sorted by either df1.Time or df2.Time, to reflect r_1 or r_2, respectively. We only demonstrate r_1.
def doAnarchyRetiming(df1, df2, df1Renaming=renameRef):
    sortedDf1 = df1.sort_values(VELOCITY_KEY).reset_index(drop=True).rename(df1Renaming, axis='columns')
    sortedDf2 = df2.sort_values(VELOCITY_KEY).reset_index(drop=True)
    joined = pd.concat([sortedDf1, sortedDf2], axis=1)
    retiming = joined.sort_values(df1Renaming('Time')).reset_index(drop=True)
    
    return retiming


permAnarchyRet1 = doAnarchyRetiming(nedc, perm1)
anarchyPerm1 = splitExperiments(permAnarchyRet1)[1]
anarchyPerm1Accumulated = accumulatedNOxPerTotalDistance(anarchyPerm1)

permAnarchyRet2 = doAnarchyRetiming(nedc, perm2)
anarchyPerm2 = splitExperiments(permAnarchyRet2)[1]
anarchyPerm2Accumulated = accumulatedNOxPerTotalDistance(anarchyPerm2)

plotCycles(nedcAccumulated, anarchyPerm1Accumulated, "Anarchy Perm1 and averaged NEDC (NOx is accumulated)", noxTicks=NOX_TICKS_ACCUMULATED)
plotCycles(nedcAccumulated, anarchyPerm2Accumulated, "Anarchy Perm2 and averaged NEDC (NOx is accumulated)", noxTicks=NOX_TICKS_ACCUMULATED)


# Maximum value error for PermNEDC1 with anarchy retiming
getMaximumValueError(permAnarchyRet1, VELOCITY_KEY)

3.0


# Maximum value error for PermNEDC2 with anarchy retiming
getMaximumValueError(permAnarchyRet2, VELOCITY_KEY)

3.0


# Retiming functions
def nedc2double(t):
    return t

def double2nedc(t):
    return t % 1180


# Applying retiming function `nedc2double` to remove the second half of DoubleNEDC
def doDoubleRetiming1(nedc, double, nedcRename=renameRef):
    nedc = nedc.rename(nedcRename, axis='columns')
    retiming = nedc.apply(lambda row: pd.concat([row, double.loc[nedc2double(row.name)]]), axis=1)
    return retiming

# Applying retiming function `double2nedc` to concat two NEDC executions
def doDoubleRetiming2(nedc, double, nedcRename=renameRef):
    nedc = nedc.rename(nedcRename, axis='columns')
    retiming = double.apply(lambda row: pd.concat([nedc.loc[double2nedc(row.name)], row]), axis=1)
    return retiming

double1Retiming1 = doDoubleRetiming1(nedc, double1)
shortenedDouble1 = splitExperiments(double1Retiming1)[1]
shortenedDouble1Accumulated = accumulatedNOxPerTotalDistance(shortenedDouble1)
plotCycles(nedcAccumulated, shortenedDouble1Accumulated, "Double1 and averaged NEDC (nedc2double)", noxTicks=NOX_TICKS_ACCUMULATED)

double2Retiming1 = doDoubleRetiming1(nedc, double2)
shortenedDouble2 = splitExperiments(double2Retiming1)[1]
shortenedDouble2Accumulated = accumulatedNOxPerTotalDistance(shortenedDouble2)
plotCycles(nedcAccumulated, shortenedDouble2Accumulated, "Double2 and averaged NEDC (nedc2double)", noxTicks=NOX_TICKS_ACCUMULATED)


nedcDouble1Retiming = doDoubleRetiming2(nedc, double1)  # Doesn't matter if we pick double1 or double2 as both use the same retiming function
doubledNEDC = splitExperiments(nedcDouble1Retiming)[0]
doubledNEDCAccumulated = accumulatedNOxPerTotalDistance(doubledNEDC)
plotCycles(doubledNEDCAccumulated, double1Accumulated, "Double1 and averaged NEDC (double2nedc)", timeTicks=TIME_TICKS_DOUBLE, noxTicks=NOX_TICKS_ACCUMULATED, timeValues=double1['Time'])

nedcDouble2Retiming = doDoubleRetiming2(nedc, double2)  # Provide for future applications
plotCycles(doubledNEDCAccumulated, double2Accumulated, "Double2 and averaged NEDC (double2nedc)", timeTicks=TIME_TICKS_DOUBLE, noxTicks=NOX_TICKS_ACCUMULATED, timeValues=double2['Time'])


# Maximum value error for DoubleNEDC1
max(getMaximumValueError(double1Retiming1, VELOCITY_KEY), getMaximumValueError(nedcDouble1Retiming, VELOCITY_KEY))

15.0


# Maximum value error for DoubleNEDC2
max(getMaximumValueError(double2Retiming1, VELOCITY_KEY), getMaximumValueError(nedcDouble2Retiming, VELOCITY_KEY))

25.0


# Functions to compute minimal epsilon for hybrid conformance, as explained in the paper
def localMin(t1, df1, df2, tau):
    val1 = df1.loc[t1, VELOCITY_KEY]
    val2Range = df2.loc[t1-tau:t1+tau, VELOCITY_KEY]
    diffs = val2Range.apply(lambda x: abs(x - val1))
    return min(diffs)

def globalMin(df1, df2, tau):
    diffs = df1.apply(lambda row: localMin(round(row['Time']), df1, df2, tau), axis=1)
    return max(diffs)

def hybridValueError(df1, df2, tau):
    return max(globalMin(df1, df2, tau), globalMin(df2, df1, tau))


# Functions for PermNEDC and DoubleNEDC with hybrid conformance, as explained in the paper
# We use the precomputed retimed (Perm)NEDC from above, hence we omit `nedc` and `perm` arguments from the function
def perm1HybridValueError(tau):
    return max(globalMin(nedcAccumulated, recoveredPerm1Accumulated, tau), globalMin(perm1Accumulated, permutedNEDCAccumulated, tau))

def perm2HybridValueError(tau):
    return max(globalMin(nedcAccumulated, recoveredPerm2Accumulated, tau), globalMin(perm2Accumulated, permutedNEDCAccumulated, tau))

def double1HybridValueError(tau):
    return max(globalMin(nedcAccumulated, shortenedDouble1Accumulated, tau), globalMin(double1Accumulated, doubledNEDCAccumulated, tau))

def double2HybridValueError(tau):
    return max(globalMin(nedcAccumulated, shortenedDouble2Accumulated, tau), globalMin(double2Accumulated, doubledNEDCAccumulated, tau))


# Compute Table 1 in the paper

tauOfInterest = [0,1,2,3,5,10,15,20]

def pHybridValueError(df1, df2):
    return lambda tau: hybridValueError(df1, df2, tau)

def computeEpsilons(f_hybrid, cycleName):
    epsilons = map(lambda tau: 'eps = ' + str(f_hybrid(tau)), tauOfInterest)
    index = map(lambda tau: 'tau = ' + str(tau), tauOfInterest)
    return pd.Series(epsilons, index=index, name=cycleName)


def computeFullTable():
    s1 = computeEpsilons(perm1HybridValueError, 'PermNEDC-1')
    s2 = computeEpsilons(perm2HybridValueError, 'PermNEDC-2')
    s3 = computeEpsilons(double1HybridValueError, 'DoubleNEDC-1')
    s4 = computeEpsilons(double2HybridValueError, 'DoubleNEDC-2')
    s5 = computeEpsilons(pHybridValueError(nedcAccumulated, sine1Accumulated), 'SineNEDC-1')
    s6 = computeEpsilons(pHybridValueError(nedcAccumulated, sine2Accumulated), 'SineNEDC-2')
    
    frame = pd.DataFrame([s1,s2,s3,s4,s5,s6])
    
    return frame


fullTable = computeFullTable()


fullTable

	tau = 0	tau = 1	tau = 2	tau = 3	tau = 5	tau = 10	tau = 15	tau = 20
PermNEDC-1	eps = 16.0	eps = 16.0	eps = 16.0	eps = 11.0	eps = 8.0	eps = 8.0	eps = 8.0	eps = 8.0
PermNEDC-2	eps = 11.0	eps = 10.0	eps = 7.0	eps = 7.0	eps = 7.0	eps = 7.0	eps = 7.0	eps = 7.0
DoubleNEDC-1	eps = 15.0	eps = 12.0	eps = 11.0	eps = 9.0	eps = 6.0	eps = 6.0	eps = 6.0	eps = 6.0
DoubleNEDC-2	eps = 25.0	eps = 18.0	eps = 10.0	eps = 8.0	eps = 8.0	eps = 8.0	eps = 8.0	eps = 8.0
SineNEDC-1	eps = 18.0	eps = 16.0	eps = 15.0	eps = 12.0	eps = 9.0	eps = 7.0	eps = 6.0	eps = 6.0
SineNEDC-2	eps = 13.0	eps = 11.0	eps = 9.0	eps = 9.0	eps = 7.0	eps = 7.0	eps = 7.0	eps = 7.0

Conformance Relations and Hyperproperties for Doping Detection in Time and Space — Case Study Details¶

Retiming Experiments¶

NEDC Permutations¶

Double NEDC¶

SineNEDC & Hybrid Conformance¶