[13]:
# Remove input cells at runtime (nbsphinx)
import IPython.core.display as d
d.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

Image cleaning

Author(s): - Dr. Michele Peresano (CEA-Saclay/IRFU/DAp/LEPCHE), 2020

Description:

This notebook contains DL1-image-cleaning plots and benchmark proposals from the protopipe pipeline.
This was mainly triggered by the step-by-step comparison against CTA-MARS, but it can be extended to other pipelines as well.
NOTE Let’s try to follow this document by adding those benchmarks or proposing new ones.

Requirements:

To run this notebook you will need a DL1 file generated using protopipe.scripts.data_training.py .
Reference simtel-files, plots, values and settings can be found here (please, always refer to the latest version) until we have a more automatic and fancy approach (aka cta-benchmarks+ctaplot).

The data format required to run the notebook is the current one used by protopipe . Soon it will be the same as in ctapipe .

Development and testing:

For the moment this notebook is optimized to work only on files produced from LSTCam + NectarCam telescope configurations.
As with any other part of protopipe and being part of the official repository, this notebook can be further developed by any interested contributor.
The execution of this notebook is not currently automatic, it must be done locally by the user - preferably before pushing a pull-request.

TODO:

Table of contents

[14]:
import os
import tables
from pathlib import Path
import numpy as np
import pandas
import uproot
from scipy.stats import binned_statistic, binned_statistic_2d, cumfreq, percentileofscore
from astropy import units as u
from astropy.table import Table
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D

from ctapipe.instrument import OpticsDescription
#from ctapipe.image.hillas import camera_to_shower_coordinates
[15]:
def get_camera_names(inputPath = None, fileName = None):
    """Read the names of the cameras.

    Parameters
    ==========
    infile : str
        Full path of the input DL1 file.
    fileName : str
        Name of the input DL1 file.

    Returns
    =======
    camera_names : list(str)
        Table names as a list.
    """
    if (inputPath is None) or (fileName is None):
        print("ERROR: check input")
    h5file = tables.open_file(os.path.join(inputPath, fileName), mode='r')
    group = h5file.get_node("/")
    camera_names = [x.name for x in group._f_list_nodes()]
    h5file.close()
    return camera_names
[16]:
def load_reset_dl1_pandas(inputPath = None, fileName = None, camera_names=None):
    """(Re)load the file containing DL1(a) data and extract the data per telescope type.

    Parameters
    ==========
    inputPath : str
        Full path of the input DL1 file.
    fileName : str
        Name of the input DL1 file.

    Returns
    =======
    dataFrames : dict(pandas.DataFrame)
        Dictionary of tables per camera.
    """
    if (inputPath is None) or (fileName is None):
        print("ERROR: check input")
    if camera_names is None:
        print("ERROR: no cameras specified")
    # load DL1 images
    dataFrames = {camera : pandas.read_hdf(os.path.join(inputPath, fileName), f"/{camera}") for camera in camera_names}
    return dataFrames
[17]:
def read_dl1_quantities(data):
    """Get DL1 quantities as arrays from a camera table."""

    dictionary = {

            "Intensity [#phe]"   : data["hillas_intensity_reco"],
            "Width [m]"          : data["hillas_width_reco"],
            "Length [m]"         : data["hillas_length_reco"],
            "Skewness"           : data["hillas_skewness"],
            "Kurtosis"           : data["hillas_kurtosis"],
            "H_max [m]"          : data["h_max"],
            "n_pixel"            : data["pixels"],
            "Ellipticity"        : data["hillas_ellipticity"],
            "Leakage 1"          : data["leakage_intensity_width_1_reco"],
            "psi"                : data["hillas_psi_reco"],
            "cog_x"              : data["hillas_x_reco"],
            "cog_y"              : data["hillas_y_reco"],
            "cog_r"              : data["hillas_r_reco"],
        }

    return dictionary
[18]:
def add_stats(x, ax, fontsize):
    """Add a textbox containing statistical information."""
    mu = x.mean()
    median = x.median()
    sigma = x.std()
    textstr = '\n'.join((
        r'$\mu=%.2f$' % (mu, ),
        r'$\mathrm{median}=%.2f$' % (median, ),
        r'$\sigma=%.2f$' % (sigma, )))

    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    # place a text box in upper left in axes coords
    ax.text(0.70, 0.85,
            textstr,
            transform=ax.transAxes,
            fontsize=fontsize,
            horizontalalignment='left',
            verticalalignment='center',
            bbox=props)
[19]:
# Modify these variables according to your local setup outside of the Vagrant Box
parentDir = "" # path to 'shared_folder'
analysisName = "v0.4.0_dev1"
[20]:
indir = os.path.join(parentDir,
                      "shared_folder/analyses",
                      analysisName,
                      "data/TRAINING/for_energy_estimation")
infile = "TRAINING_energy_tail_gamma_merged.h5"
[21]:
cameras = get_camera_names(inputPath = indir, fileName = infile)
data = load_reset_dl1_pandas(inputPath = indir, fileName = infile, camera_names=cameras)
[22]:
average_camera_radii_deg = {
        "ASTRICam": 4.67,
        "CHEC": 3.93,
        "DigiCam": 4.56,
        "FlashCam": 3.95,
        "NectarCam": 4.05,
        "LSTCam": 2.31,
        "SCTCam": 4.0,  # dummy value
    }
[23]:
indir_refData = "" # path to CTAMARS ROOT file
mars_dl1b_fileName = "check_dl1b.root"
path_mars_dl1b = os.path.join(indir_refData, mars_dl1b_fileName)

# where is the CTAMARS ROOT file (do not change this information)
mars_LST1size_fileName = "LST1_SIZE_distro_gamma1sample.root"
path_mars_LST1size = os.path.join(indir_refData, mars_LST1size_fileName)
[24]:
# read CTAMARS ROOT file
ctamars_dl1b = uproot.open(path_mars_dl1b)
# ctamars_dl1b.allkeys()

ctamars_LST1size = uproot.open(path_mars_LST1size)
[25]:
mars_size_WL_LSTCam = ctamars_dl1b["log10Size_WL_type0"].to_numpy()
mars_size_WL_NectarCam = ctamars_dl1b["log10Size_WL_type1"].to_numpy()
mars_size_d80_LSTCam = ctamars_dl1b["log10Size_d80_type0"].to_numpy()
mars_size_d80_NectarCam = ctamars_dl1b["log10Size_d80_type1"].to_numpy()
mars_size_LST1Cam = ctamars_LST1size["h"].to_numpy()

CTAMARS = {}
CTAMARS["LSTCam"] = {"size_WL" : mars_size_WL_LSTCam, "size_d80" : mars_size_d80_LSTCam, "size_LST1" : mars_size_LST1Cam}
CTAMARS["NectarCam"] = {"size_WL" : mars_size_WL_NectarCam, "size_d80" : mars_size_d80_NectarCam}
[26]:
# First we check if a _plots_ folder exists already.
# If not, we create it.
Path("./plots").mkdir(parents=True, exist_ok=True)

Fraction of events (relative to telescope triggers) that survive a given intensity cut

back to top

[27]:
nbins = 250
xrange = [0,6]

for camera in cameras:

    fig = plt.figure(figsize=(6, 5), tight_layout=False)
    plt.xlabel("log10(intensity #p.e)")
    plt.ylabel("Telescope triggers fraction with log10(intensity #p.e) > x phe")
    plt.title(camera)

    tot_entries = len(data[camera]["hillas_intensity_reco"])

    # No cuts
    DL1_filtered = data[camera].loc[:]
    intensity_hist, xbins = np.histogram( np.log10(DL1_filtered["hillas_intensity_reco"]), bins=nbins, range=xrange)
    plt.plot(xbins[:-1], intensity_hist[::-1].cumsum()[::-1]/tot_entries, drawstyle="steps-post", label="No cuts", color="steelblue")

    # Cut in the number of pixels
    #DL1_filtered = data[camera].loc[data[camera]['pixels'] > 3]
    #intensity_hist, xbins = np.histogram( np.log10(DL1_filtered["hillas_intensity_reco"]), bins=nbins, range=xrange)
    #plt.plot(xbins[:-1], intensity_hist[::-1].cumsum()[::-1]/tot_entries, drawstyle="steps-post", label="n_pixel")

    # Cut in ellipticity
    DL1_filtered = DL1_filtered.loc[(data[camera]['hillas_ellipticity_reco'] > 0.1) & (data[camera]['hillas_ellipticity_reco'] < 0.6)]
    intensity_hist, xbins = np.histogram( np.log10(DL1_filtered["hillas_intensity_reco"]), bins=nbins, range=xrange)
    plt.plot(xbins[:-1], intensity_hist[::-1].cumsum()[::-1]/tot_entries, drawstyle="steps-post", label="ellipticity", color="green")

    # Cut in containment radius
    DL1_filtered = DL1_filtered.loc[data[camera]['hillas_r_reco'] < (average_camera_radii_deg[camera]*0.8)]
    intensity_hist, xbins = np.histogram( np.log10(DL1_filtered["hillas_intensity_reco"]), bins=nbins, range=xrange)
    plt.plot(xbins[:-1], intensity_hist[::-1].cumsum()[::-1]/tot_entries, drawstyle="steps-post", label="COG continment", color="red")

    # Plot CTAMARS data

    x = 0.5 * (CTAMARS[camera]["size_WL"][1][1:] + CTAMARS[camera]["size_WL"][1][:-1])
    plt.step(x, CTAMARS[camera]["size_WL"][0], where='mid', label='CTAMARS ellipticity', color="green", linestyle="--")
    plt.step(x, CTAMARS[camera]["size_d80"][0], where='mid', label='CTAMARS COG continment', color="red", linestyle="--")

    plt.ylim([0.,1.05])
    plt.minorticks_on()
    plt.grid()
    plt.legend()

    fig.savefig(f"./plots/image_cleaning_eventsAboveIntensity_{camera}_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL1_image-cleaning_24_0.png
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL1_image-cleaning_24_1.png

Image-parameter distributions

back to top

Notes
- probably better to make bins in true energy - the parameters should be at least those that enter the estimators training (here only the pure DL1 are listed)
[28]:
# common settings
nbins = 100
range_histogram = [1.,6.]

LST-1 image intensity

back to top

[29]:
only_LST1 = data["LSTCam"]["tel_id"]==1
size_LST1 = data["LSTCam"]["hillas_intensity_reco"][only_LST1]

camera = "LSTCam"
key = "hillas_intensity_reco"

fig = plt.figure(figsize=(16, 5), tight_layout=False)

plt.subplot(1,2,1)

plt.xlabel(f"log10({key}) [#phe]")
plt.ylabel("Number of images")
plt.title(camera)
h_protopipe = plt.hist(np.log10(size_LST1), bins=nbins, range=[1.,6.], histtype="step", label="protopipe")

x = 0.5 * (CTAMARS[camera]["size_LST1"][1][1:] + CTAMARS[camera]["size_LST1"][1][:-1])
plt.step(x, CTAMARS[camera]["size_LST1"][0], where='mid', label='CTAMARS')

plt.yscale('log')
plt.minorticks_on()
plt.grid(which = "both")

plt.legend()

plt.subplot(1,2,2)

plt.xlabel(f"log10({key}) [#phe]")
plt.ylabel(f"Ratio protopipe / CTAMARS")
plt.step(x, h_protopipe[0]/CTAMARS[camera]["size_LST1"][0], where='mid')
ax = plt.gca()
xlims=ax.get_xlim()
plt.hlines(1., xlims[0], xlims[1], label="expectation", color='r')
plt.grid()
plt.legend()

plt.show()

fig.savefig(f"./plots/image_cleaning_hillas_intensity_reco_LST1_gamma1_{camera}_protopipe_{analysisName}.png")
/Users/michele/Applications/miniconda3/envs/protopipe/lib/python3.7/site-packages/ipykernel_launcher.py:29: RuntimeWarning: divide by zero encountered in true_divide
/Users/michele/Applications/miniconda3/envs/protopipe/lib/python3.7/site-packages/ipykernel_launcher.py:29: RuntimeWarning: invalid value encountered in true_divide
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL1_image-cleaning_29_1.png

DL1 Parameters used for energy training from all telecopes

back to top

[30]:
nbins = 100
parameters_to_plot = ["hillas_intensity_reco", "hillas_width", "hillas_length"]

fig, axes = plt.subplots(ncols=len(parameters_to_plot),
                         nrows=len(cameras),
                         constrained_layout=False,
                         figsize = (40, 20))
plt.subplots_adjust(hspace = 0.5)
fontsize=20

for i, camera in enumerate(cameras):

    for j, key in enumerate(parameters_to_plot):

        axes[i, j].set_ylabel("Number of events", fontsize=fontsize)
        axes[i, j].set_title(camera, fontsize=fontsize)

        if key == "hillas_intensity_reco":
            axes[i, j].set_xlabel(f"log10({key}) [#phe]", fontsize=fontsize)
            axes[i, j].hist(np.log10(data[camera][key]),
                            bins=nbins,
                            range=[1.,6.],
                            alpha = 0.5,
                            histtype="step",
                            linewidth=5)
            add_stats(np.log10(data[camera][key]), axes[i, j], fontsize=fontsize)
        else:
            axes[i, j].set_xlabel(f"{key} [deg]", fontsize=fontsize)
            axes[i, j].hist(data[camera][key],
                            bins=nbins,
                            alpha = 0.5,
                            histtype="step",
                            linewidth=5)
            add_stats(data[camera][key], axes[i, j], fontsize=fontsize)


        axes[i, j].set_yscale('log')
        axes[i, j].minorticks_on()
        axes[i, j].grid(which = "both")

        # Save just the portion _inside_ the second axis's boundaries
        extent = axes[i, j].get_window_extent().transformed(fig.dpi_scale_trans.inverted())
        fig.savefig(f"./plots/image_cleaning_{key}_gamma1_{camera}_protopipe_{analysisName}.png", bbox_inches=extent.expanded(1.2, 1.2))

fig.savefig(f"./plots/image_cleaning_gamma1_allKeysallCameras_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL1_image-cleaning_31_0.png
[ ]: