[3]:
# Remove input cells at runtime (nbsphinx)
import IPython.core.display as d
d.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

Direction reconstruction (TRAINING)

Author(s): - Dr. Michele Peresano (CEA-Saclay/IRFU/DAp/LEPCHE), 2020

Description:

This notebook contains plots and benchmarks proposals from the protopipe pipeline related to shower reconstruction.
It uses the training dataset used to train the energy model, which is when this information is used for the first time within the current pipeline workflow. This was mainly triggered by the step-by-step comparison against CTA-MARS, but it can be extended to other pipelines as well.

NOTES:

  • these benchmarks will be cross-validated and migrated in cta-benchmarks/ctaplot

  • Let’s try to follow this document by adding those benchmarks or proposing new ones.

Requirements:

To run this notebook you will need a TRAINING file generated using protopipe.scripts.data_training.py .
Reference simtel-files, plots, values and settings can be found here (please, always refer to the latest version).

The data format required to run the notebook is the current one used by protopipe . Soon it will be the same as in ctapipe (1 full DL1 file + 1 DL2 file with only shower geometry information).

Development and testing:

As with any other part of protopipe and being part of the official repository, this notebook can be further developed by any interested contributor.
Though I will soon start a cross-validation with ctaplot), so if you want to add something and it’s already there, please import those functions instead of creating new ones. The execution of this notebook is not currently automatic due to lack of proper test data, it must be done locally by the user - preferably before pushing a pull-request.

TODO:

Table of contents

[4]:
import os
from pathlib import Path

import tables
import pandas
import numpy as np
import uproot
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
[5]:
def get_camera_names(inputPath = None, fileName = None):
    """Read the names of the cameras.

    Parameters
    ==========
    infile : str
        Full path of the input DL1 file.
    fileName : str
        Name of the input DL1 file.

    Returns
    =======
    camera_names : list(str)
        Table names as a list.
    """
    if (inputPath is None) or (fileName is None):
        print("ERROR: check input")
    h5file = tables.open_file(os.path.join(inputPath, fileName), mode='r')
    group = h5file.get_node("/")
    camera_names = [x.name for x in group._f_list_nodes()]
    h5file.close()
    return camera_names
[6]:
def load_reset_infile_protopipe(inputPath = None, fileName = None, camera_names=None):
    """(Re)load the file containing DL1(a) data and extract the data per telescope type.

    Parameters
    ==========
    infile : str
        Full path of the input DL1 file.
    fileName : str
        Name of the input DL1 file.

    Returns
    =======
    dataFrames : dict(pandas.DataFrame)
        Dictionary of tables per camera.
    """
    if (inputPath is None) or (fileName is None):
        print("ERROR: check input")
    if camera_names is None:
        print("ERROR: no cameras specified")
    # load DL1 images
    dataFrames = {camera : pandas.read_hdf(os.path.join(inputPath, fileName), f"/{camera}") for camera in camera_names}
    return dataFrames
[7]:
def add_stats(x, ax):
    """Add a textbox containing statistical information."""
    mu = x.mean()
    median = np.median(x)
    sigma = x.std()
    textstr = '\n'.join((
        r'$\mu=%.2f$' % (mu, ),
        r'$\mathrm{median}=%.2f$' % (median, ),
        r'$\sigma=%.2f$' % (sigma, )))

    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    # place a text box in upper left in axes coords
    ax.text(0.70, 0.85,
            textstr,
            transform=ax.transAxes,
            fontsize=10,
            horizontalalignment='left',
            verticalalignment='center',
            bbox=props)
[8]:
# Modify these variables according to your local setup outside of the Vagrant Box
parentDir = "/Users/michele/Applications/ctasoft/dirac" # path to 'shared_folder'
analysisName = "v0.4.0_dev1"
[9]:
indir = os.path.join(parentDir,
                      "shared_folder/analyses",
                      analysisName,
                      "data/TRAINING/for_energy_estimation")
infile = "TRAINING_energy_tail_gamma_merged.h5"

cameras = get_camera_names(inputPath = indir,
                                   fileName = infile)
data = load_reset_infile_protopipe(inputPath = indir,
                                   fileName = infile,
                                   camera_names=cameras)
[10]:
# select only successfully reconstructed showers
reconstructed_showers = {}
for camera in cameras:
    reconstructed_showers[camera] = data[camera][(data[camera]["is_valid"]==True)]
[11]:
indir_refData = "/Volumes/DataCEA_PERESANO/Data/CTA/ASWG/Prod3b/Release_2019/CTAMARS_reference_data/TRAINING/DL2" # path to CTAMARS ROOT file
mars_dl2a_fileName = "CTA_check_dl2_4L15M.root"
path_mars_dl2a = os.path.join(indir_refData, mars_dl2a_fileName)
[12]:
with uproot.open(path_mars_dl2a) as CTAMARS:
    CTAMARS_angres = CTAMARS["angres"]
    CTAMARS_reco_fraction = CTAMARS["reco_fraction"]
    CTAMARS_histcore = CTAMARS["histcore"]
    CTAMARS_coreres = CTAMARS["coreres"]
[13]:
# TODO
[14]:
# First we check if a _plots_ folder exists already.
# If not, we create it.
Path("./plots").mkdir(parents=True, exist_ok=True)

Angular resolution

back to top

Note
This benchmark uses all events with >=2 valid images, and requiring LST-subarray stereo trigger, i.e. either 0 or >=2 triggered LSTs - since LST-subarray hardware trigger was not required in the simulation
[15]:
true_energy_bin_edges = CTAMARS_reco_fraction.member("fXaxis").edges()
true_energy_bin_centers = 0.5 * (true_energy_bin_edges[:-1] + true_energy_bin_edges[1:])
[16]:
reco_showers_stereo_LST = {}
for i, camera in enumerate(cameras):
    # for each camera select only images corresponing to events seen by either None or at least 2 LSTs (LST stereo trigger condition)
    reco_showers_stereo_LST[camera] = reconstructed_showers[camera][(reconstructed_showers[camera]["N_LST"] == 0) | (reconstructed_showers[camera]["N_LST"] >= 2)]
    # then merge the tables
    if i==0:
        all_reco_showers_stereo_LST = reco_showers_stereo_LST[camera]
    else:
        all_reco_showers_stereo_LST = all_reco_showers_stereo_LST.append(reco_showers_stereo_LST[camera])
# Finally drop duplicate showers (stereo information is the same for each event ID)
unique_all_reco_showers_stereo_LST = all_reco_showers_stereo_LST.drop_duplicates(subset=['event_id'])
[17]:
fig = plt.figure(figsize=(14, 7), tight_layout=False)

plt.subplot(1,2,1)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Angular distribution 68% containment [deg]")

# protopipe

theta = {}
ang_res = np.zeros(len(true_energy_bin_centers))

true_energy = np.log10(unique_all_reco_showers_stereo_LST["true_energy"])
offset = unique_all_reco_showers_stereo_LST["offset"]

for i in range(len(true_energy_bin_centers)):

    mask = (true_energy > true_energy_bin_edges[i]) & (true_energy <= true_energy_bin_edges[i + 1])
    selected_offsets = np.sort(offset[mask])
    ang_res[i] = np.percentile(selected_offsets, 68.0)

plt.plot(true_energy_bin_centers, ang_res, '-', label = "protopipe")

# CTAMARS
plt.plot(CTAMARS_angres.member("fX"), CTAMARS_angres.member("fY"), '-', label = "CTAMARS")

plt.grid(which="both")
plt.legend()

plt.subplot(1,2,2)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Ratio protopipe / CTAMARS")

plt.plot(CTAMARS_angres.member("fX"), ang_res/CTAMARS_angres.member("fY"), '-', label = "CTAMARS")
plt.hlines(1.0, plt.gca().get_xlim()[0], plt.gca().get_xlim()[1], linestyle= '--', color="red")
plt.ylim(0, 2)

plt.show()

fig.savefig(f"./plots/training_angular_resolution_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL2_direction-reconstruction_26_0.png

Reconstruction efficiency relative to the number of stereoscopic triggers

back to top

Selection cuts: - all showers with >=2 valid images, - LST-subarray stereo trigger, i.e. either 0 or >=2 triggered LSTs (LST-subarray hardware trigger was not required in the simulation)

[18]:
all_cameras = data["LSTCam"].append(data["NectarCam"])
all_unique_showers = all_cameras.drop_duplicates(subset=['event_id'])
all_unique_showers_stereo_LST = all_unique_showers[(all_unique_showers["N_LST"] == 0) | (all_unique_showers["N_LST"] >= 2)]
reco_showers_stereo_LST = all_unique_showers_stereo_LST[all_unique_showers_stereo_LST["is_valid"]==True]
[19]:
H_all = np.histogram(np.log10(all_unique_showers_stereo_LST["true_energy"]), bins=true_energy_bin_edges)
H_reco = np.histogram(np.log10(reco_showers_stereo_LST["true_energy"]), bins=true_energy_bin_edges)
reconstruction_efficiency = H_reco[0] / H_all[0]
[20]:
fig = plt.figure(figsize=(14, 7), tight_layout=False)

plt.subplot(1,2,1)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Reconstruction efficiency relative to the number of stereoscopic triggers")

plt.errorbar(true_energy_bin_centers,
             reconstruction_efficiency,
             xerr=np.diff(true_energy_bin_edges)/2,
             yerr = None,
             ls='none',
             label = "protopipe")

plt.errorbar(x = true_energy_bin_centers,
             y = CTAMARS_reco_fraction.to_numpy()[0],
             xerr = np.diff(true_energy_bin_edges)/2,
             yerr = CTAMARS_reco_fraction.errors(),
             ls='none',
             label = "CTAMARS")

plt.grid(which="both")
plt.legend(loc="best")

plt.subplot(1,2,2)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Ratio protopipe / CTAMARS")

plt.plot(true_energy_bin_centers,
         reconstruction_efficiency/CTAMARS_reco_fraction.to_numpy()[0],
         '-')
plt.hlines(1.0, plt.gca().get_xlim()[0], plt.gca().get_xlim()[1], linestyle= '--', color="red")
plt.ylim(0, 2)

fig.savefig(f"./plots/training_reconstruction_efficiency_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL2_direction-reconstruction_31_0.png

Distribution of true core positions for reconstructed events

back to top

Selection cuts: - all showers with >=2 valid images, - LST-subarray stereo trigger, i.e. either 0 or >=2 triggered LSTs (LST-subarray hardware trigger was not required in the simulation)

[21]:
# Not important which simtel you use, just be sure is from this simulation
# https://forge.in2p3.fr/projects/step-by-step-reference-mars-analysis/wiki#The-MC-sample
from ctapipe.io import event_source
indir_simtel = Path("/Volumes/DataCEA_PERESANO/Data/CTA/shared_folder/productions/CTA-S")
simtel = "gamma_20deg_180deg_run100___cta-prod3-demo-2147m-LaPalma-baseline.simtel.gz"
source = event_source(input_url=Path(indir_simtel, simtel), max_events=1)
tel_positions = source.subarray.positions
[22]:
plt.figure(figsize=(7, 7))
plt.gca().set_aspect('equal')

nbins = 600

core_distribution = plt.hist2d(x = reco_showers_stereo_LST["mc_core_x"],
           y = reco_showers_stereo_LST["mc_core_y"],
           bins=[nbins, nbins],
           range=[[-1.e3, 1.e3], [-1.e3, 1.e3]],
           cmap=plt.cm.rainbow,
           norm=LogNorm()
           )

plt.colorbar(core_distribution[3], ax=plt.gca())

# Superimpose telescopes positions
for tel_index in tel_positions.keys():
    plt.plot(tel_positions[tel_index].value[0], tel_positions[tel_index].value[1], 'o', color="black")

plt.xlabel("True core X [m]")
plt.ylabel("True core Y [m]")

plt.show()

fig.savefig(f"./plots/training_true_cores_distro_reco_events_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL2_direction-reconstruction_35_0.png

Shower core reconstruction

back to top

All showers reconstructed by any 2 telescopes

[23]:
reco_showers_stereo = {}
for i, camera in enumerate(cameras):
    # for each camera table select only rows corresponing to events seen by any >=2 telescopes
    reco_showers_stereo[camera] = reconstructed_showers[camera][reconstructed_showers[camera]["n_tel_reco"] >= 2]
    # then merge the tables
    if i==0:
        allcameras_reco_showers_stereo = reco_showers_stereo[camera]
    else:
        allcameras_reco_showers_stereo = allcameras_reco_showers_stereo.append(reco_showers_stereo[camera])
# Finally drop duplicate showers (stereo information is the same for each event ID)
unique_allcameras_reco_showers_stereo = allcameras_reco_showers_stereo.drop_duplicates(subset=['event_id'])
[24]:
core_res = np.zeros(len(true_energy_bin_centers))

fig = plt.figure(figsize=(14, 7))

plt.subplot(1,2,1)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Reconstructed cores sigma 68% [m]")

true_energy = np.log10(unique_allcameras_reco_showers_stereo["true_energy"])

true_core_x = unique_allcameras_reco_showers_stereo["mc_core_x"]
true_core_y = unique_allcameras_reco_showers_stereo["mc_core_y"]
reco_core_x = unique_allcameras_reco_showers_stereo["reco_core_x"]
reco_core_y = unique_allcameras_reco_showers_stereo["reco_core_y"]

core_distances = np.sqrt((true_core_x - reco_core_x)**2 + (true_core_y - reco_core_y)**2)

for i in range(len(true_energy_bin_centers)):

    mask = (true_energy > true_energy_bin_edges[i]) & (true_energy <= true_energy_bin_edges[i + 1])
    selected_core_distances = np.sort(core_distances[mask])
    core_res[i] = np.percentile(selected_core_distances, 68.0)

plt.plot(true_energy_bin_centers, core_res, 'o', label = "protopipe")

# CTAMARS
plt.plot(CTAMARS_coreres.members["fX"], CTAMARS_coreres.member("fY"), '-', label = "CTAMARS")

plt.grid(which="both")
plt.legend()

plt.subplot(1,2,2)

plt.xlabel("log10(True energy) [TeV]")
plt.ylabel("Ratio protopipe / CTAMARS")

plt.plot(true_energy_bin_centers,
         core_res/CTAMARS_coreres.member("fY"),
         '-')
plt.hlines(1.0, plt.gca().get_xlim()[0], plt.gca().get_xlim()[1], linestyle= '--', color="red")
plt.ylim(0, 2)

plt.show()

fig.savefig(f"./plots/training_shower_core_reco_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL2_direction-reconstruction_39_0.png

Shower maximum height reconstruction

back to top

All showers reconstructed by at least 2 telescopes.

[25]:
plt.figure(figsize=(17,7))

h_max = unique_allcameras_reco_showers_stereo["h_max"]
e_true = unique_allcameras_reco_showers_stereo["true_energy"]
nbins = 200

plt.subplot(1,2,1)

plt.hist(np.log10(h_max),
         bins=nbins,
         range=[0, 6],
         alpha=0.5,
         color="red")
plt.yscale("log")
plt.xlabel("log10(H_max) [m]")
plt.ylabel("Number of showers")
plt.grid(which="both")

plt.subplot(1,2,2)

plt.hist2d(np.log10(e_true), np.log10(h_max), bins=[nbins,nbins], norm=LogNorm())
plt.colorbar()
plt.xlabel("log10(true energy) [TeV]")
plt.ylabel("log10(H_max) [m]")
plt.grid(which="both")

plt.show()

fig.savefig(f"./plots/training_hmax_distro_protopipe_{analysisName}.png")
../../../_images/contribute_benchmarks_TRAINING_benchmarks_DL2_direction-reconstruction_42_0.png
[ ]: