import os
import pickle
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
from IPython.display import HTML, display
from sklearn.inspection import PartialDependenceDisplay, permutation_importance
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.utils import resample


with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
    import shap
    import ydata_profiling
    from alibi.explainers import AnchorTabular


df = pd.read_csv("data/input/diabetes_data.csv").drop_duplicates()
df.head(4)


df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64020 entries, 0 to 70691
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Age                   64020 non-null  float64
 1   Sex                   64020 non-null  float64
 2   HighChol              64020 non-null  float64
 3   CholCheck             64020 non-null  float64
 4   BMI                   64020 non-null  float64
 5   Smoker                64020 non-null  float64
 6   HeartDiseaseorAttack  64020 non-null  float64
 7   PhysActivity          64020 non-null  float64
 8   Fruits                64020 non-null  float64
 9   Veggies               64020 non-null  float64
 10  HvyAlcoholConsump     64020 non-null  float64
 11  GenHlth               64020 non-null  float64
 12  MentHlth              64020 non-null  float64
 13  PhysHlth              64020 non-null  float64
 14  DiffWalk              64020 non-null  float64
 15  Stroke                64020 non-null  float64
 16  HighBP                64020 non-null  float64
 17  Diabetes              64020 non-null  float64
dtypes: float64(18)
memory usage: 9.3 MB


df.describe()


filename = "output/report.html"
if not os.path.exists(filename):
    profile = ydata_profiling.ProfileReport(df, title="EDA")
    profile.to_file(filename)
display(HTML(filename=filename))


X = df.drop(columns=["Diabetes"])
y = df["Diabetes"]
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=0,
    stratify=y,
    test_size=0.2,
)


gridsearch_root = "output/gridsearch.pkl"
try:
    with open(gridsearch_root, "rb") as f:
        grid = pickle.load(f)
except FileNotFoundError:
    base_clf = xgb.XGBClassifier(random_state=0, n_jobs=5)
    param_grid = {
        "n_estimators": [100, 150, 200],
        "max_depth": [5, 6, 7],
        "learning_rate": [0.15, 0.3, 0.6],
    }
    grid = GridSearchCV(
        base_clf,
        param_grid,
        scoring="f1",
    )
    grid.fit(X_train, y_train)
    with open(gridsearch_root, "wb") as f:
        pickle.dump(grid, f)


index = grid.best_index_
pd.DataFrame(grid.cv_results_).loc[index]

mean_fit_time                                                              1.027432
std_fit_time                                                                0.19584
mean_score_time                                                            0.015004
std_score_time                                                             0.002098
param_learning_rate                                                            0.15
param_max_depth                                                                   5
param_n_estimators                                                              100
params                 {'learning_rate': 0.15, 'max_depth': 5, 'n_estimators': 100}
split0_test_score                                                          0.765355
split1_test_score                                                          0.753739
split2_test_score                                                          0.767167
split3_test_score                                                          0.764041
split4_test_score                                                          0.759648
mean_test_score                                                             0.76199
std_test_score                                                             0.004814
rank_test_score                                                                   1
Name: 0, dtype: object


clf = grid.best_estimator_
conf = confusion_matrix(y_test, clf.predict(X_test))
disp = ConfusionMatrixDisplay(conf, display_labels=["No diabetes", "Diabetes"])
disp.plot();


fig, axes = plt.subplots(ncols=3, figsize=(30, 10))
importance_types = ("weight", "cover", "gain")
for method, ax in zip(importance_types, axes):
    xgb.plot_importance(
        clf, importance_type=method, title=method, ax=ax, show_values=False
    )
plt.tight_layout()


booster = clf.get_booster()
importances = {
    method: pd.Series(booster.get_score(importance_type=method))
    .sort_values(ascending=False)
    .index
    for method in importance_types
}
weight = importances["weight"]
x = np.zeros_like(weight, dtype=np.float32)
ns = np.arange(len(weight))
fig, ax = plt.subplots(figsize=(10, 10))
for i, method in enumerate(importance_types):
    ax.scatter(x + i, ns, marker="o")
ax.scatter(x + len(importance_types), ns, marker="o")

weight_indices = np.arange(len(weight))
gain_indices = importances["gain"].values.argsort()
cover_indices = importances["cover"].values.argsort()

weight_gain_mask = weight_indices != gain_indices
gain_cover_mask = gain_indices != cover_indices
cover_weight_mask = cover_indices != weight_indices

ax.plot(
    [0, 1],
    np.column_stack((weight_indices, gain_indices))[weight_gain_mask].T,
    color="k",
)
ax.plot(
    [1, 2], np.column_stack((gain_indices, cover_indices))[gain_cover_mask].T, color="k"
)
ax.plot(
    [2, 3],
    np.column_stack((cover_indices, weight_indices))[cover_weight_mask].T,
    color="k",
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    ax.set_yticklabels(weight)
    ax.set_yticks(ns)
    ax.set_xticklabels(["weight", "gain", "cover", "weight"])
    ax.set_xticks([0, 1, 2, 3])
ax.set_title("Comparación de rankings para cada método");


scores = []
for i in range(1, 50):
    tree = DecisionTreeClassifier(max_depth=i, random_state=0)
    tree.fit(X_train, y_train)
    scores.append(tree.score(X_test, y_test))
best_depth = np.argmax(scores) + 1


fig, ax = plt.subplots(ncols=2, figsize=(30, 15))
ax[0].plot(scores)
ax[0].set_title("Precisión en función de la profundidad")
ax[0].set_xlabel("Profundidad")
ax[0].set_ylabel("Precisión")
ax[0].axvline(best_depth, color="k", linestyle="--")
ax[0].text(
    best_depth + 1,
    scores[best_depth - 1],
    f"Profundidad óptima: {best_depth}, precisión: {scores[best_depth - 1]:.2f}",
    va="center",
    ha="left",
)
tree = DecisionTreeClassifier(max_depth=best_depth, random_state=0)
tree.fit(X_train, y_train)
plot_tree(
    tree,
    max_depth=3,  # Tamaño más pequeño para que se vea bien
    ax=ax[1],
    feature_names=X.columns,
    filled=True,
    fontsize=10,
    class_names=["No diabetes", "Diabetes"],
)
ax[1].set_title("Árbol de decisión de profundidad óptima");


perm_importance = permutation_importance(
    clf, X_test, y_test, n_repeats=30, random_state=0, n_jobs=-1
)
perm_importance_df = pd.DataFrame(
    {
        "importances_mean": perm_importance.importances_mean,
        "importances_std": perm_importance.importances_std,
    },
    index=X_test.columns,
).sort_values(by="importances_mean", ascending=True)
plt.figure(figsize=(15, 15))
plt.barh(
    range(X.shape[1]),
    perm_importance_df.importances_mean,
    xerr=perm_importance_df.importances_std,
)
plt.yticks(range(X.shape[1]), X.columns)
plt.title("Importancia de características para el predictor de diabetes")
plt.xlabel("Importancia");


X.nunique().sort_values(ascending=False)

BMI                     80
PhysHlth                31
MentHlth                31
Age                     13
GenHlth                  5
Veggies                  2
Stroke                   2
DiffWalk                 2
HvyAlcoholConsump        2
Fruits                   2
Sex                      2
PhysActivity             2
HeartDiseaseorAttack     2
Smoker                   2
CholCheck                2
HighChol                 2
HighBP                   2
dtype: int64


explainer = shap.TreeExplainer(clf)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    shap_values = explainer.shap_values(X)
pd.DataFrame(shap_values, columns=X.columns, index=X.index).head()


indice_instancias = [1, 9, 150]
instancias = X.iloc[indice_instancias, :]
valores = shap_values[indice_instancias, :]
for instancia, valor in zip(instancias.iterrows(), valores):
    shap.force_plot(
        explainer.expected_value,
        valor,
        instancia[1],
        matplotlib=True,
        link="logit",
    )


with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    shap.summary_plot(shap_values, X)


def shap_to_probability(shap_value: np.ndarray) -> np.ndarray:
    """Convierte un valor de SHAP a una probabilidad."""
    odd_shift = np.exp(shap_value)
    return odd_shift / (1 + odd_shift)


top = 5
indexes = np.argsort(np.abs(shap_values).mean(0))[-top:]
fig, axes = plt.subplots(nrows=top, figsize=(10, 8 * top))
for ax, feature in zip(axes, indexes):
    shap_x = shap_values[:, feature]
    scatter = ax.scatter(
        X.iloc[:, feature], shap_x, c=shap_to_probability(shap_x), vmin=0.0, vmax=1.0
    )
    ax.set_xlabel(X.columns[feature])
    ax.set_ylabel("Valor de SHAP")
    ax.set_title("Importancia de cada característica")
    fig.colorbar(scatter)


categorical = ["HighChol", "HighBP", "GenHlth"]
best_five = X.columns[indexes]
submuestra = resample(X_test, n_samples=1_000, random_state=0, stratify=y_test)
fig, ax = plt.subplots(
    ncols=5, nrows=2, figsize=(25, 10), constrained_layout=True, sharex="col"
)
display = PartialDependenceDisplay.from_estimator(
    clf, submuestra, best_five, ax=ax[0], categorical_features=categorical
)
for i, feature in enumerate(best_five):
    sns.histplot(
        data=submuestra,
        x=feature,
        ax=ax[1, i],
        multiple="stack",
        bins=10,
    )


explainer = AnchorTabular(
    clf.predict,
    X_train.columns,
    seed=0,
)
explainer.fit(X_train.values)

AnchorTabular(meta={
    'name': 'AnchorTabular',
    'type': ['blackbox'],
    'explanations': ['local'],
    'params': {'seed': 0, 'disc_perc': (25, 50, 75)}
})


indice_instancias = [1000, 3001, 5751]
instancias = X.iloc[indice_instancias, :]
predicciones = explainer.predictor(instancias.values)
nombres = ("No tiene diabetes", "Tiene diabetes")
for indice, instancia, prediccion in zip(
    indice_instancias, instancias.values, predicciones
):
    print(f"Instancia {indice}")
    print("-" * 14)
    print(f"Predicción: {prediccion}")
    explicacion = explainer.explain(instancia)
    print("Regla:", " y ".join(explicacion.anchor))
    print(f"Precisión: {explicacion.precision}")
    print(f"Cobertura: {explicacion.coverage}")
    print(f"Población explicada: {int(explicacion.coverage*len(X))}")
    print()

Instancia 1000
--------------
Predicción: 0
Regla: GenHlth <= 2.00 y HighBP <= 0.00 y Age <= 7.00
Precisión: 0.9958932238193019
Cobertura: 0.3132224304904717
Población explicada: 20052

Instancia 3001
--------------
Predicción: 0
Regla: GenHlth <= 2.00 y HighBP <= 0.00 y Age <= 7.00
Precisión: 0.9924078091106291
Cobertura: 0.3132224304904717
Población explicada: 20052

Instancia 5751
--------------
Predicción: 0
Regla: BMI <= 25.00 y HighBP <= 0.00 y Age <= 7.00
Precisión: 0.9720101781170484
Cobertura: 0.3132224304904717
Población explicada: 20052

	Age	Sex	HighChol	CholCheck	BMI	Smoker	HeartDiseaseorAttack	PhysActivity	Fruits	Veggies	HvyAlcoholConsump	GenHlth	MentHlth	PhysHlth	DiffWalk	Stroke	HighBP	Diabetes
count	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000	64020.000000
mean	8.607420	0.455983	0.539550	0.973211	30.178554	0.490706	0.159950	0.678366	0.590862	0.771587	0.046345	2.921103	4.130537	6.385020	0.277007	0.068510	0.581756	0.521681
std	2.860717	0.498063	0.498437	0.161466	7.287730	0.499918	0.366563	0.467107	0.491679	0.419813	0.210233	1.107991	8.466301	10.368493	0.447524	0.252621	0.493275	0.499534
min	1.000000	0.000000	0.000000	0.000000	12.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	7.000000	0.000000	0.000000	1.000000	25.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	2.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	9.000000	0.000000	1.000000	1.000000	29.000000	0.000000	0.000000	1.000000	1.000000	1.000000	0.000000	3.000000	0.000000	0.000000	0.000000	0.000000	1.000000	1.000000
75%	11.000000	1.000000	1.000000	1.000000	34.000000	1.000000	0.000000	1.000000	1.000000	1.000000	0.000000	4.000000	3.000000	7.000000	1.000000	0.000000	1.000000	1.000000
max	13.000000	1.000000	1.000000	1.000000	98.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	5.000000	30.000000	30.000000	1.000000	1.000000	1.000000	1.000000

Number of variables	18
Number of observations	64020
Missing cells	0
Missing cells (%)	0.0%
Duplicate rows	0
Duplicate rows (%)	0.0%
Total size in memory	11.3 MiB
Average record size in memory	185.0 B

`CholCheck` is highly imbalanced (82.2%)	Imbalance
`HvyAlcoholConsump` is highly imbalanced (72.9%)	Imbalance
`Stroke` is highly imbalanced (64.0%)	Imbalance
`MentHlth` has 41555 (64.9%) zeros	Zeros
`PhysHlth` has 33432 (52.2%) zeros	Zeros

Analysis started	2023-06-29 20:54:28.398598
Analysis finished	2023-06-29 20:54:36.531913
Duration	8.13 seconds
Software version	ydata-profiling vv4.1.2
Download configuration	config.json

Distinct	13
Distinct (%)	< 0.1%
Missing	0
Missing (%)	0.0%
Infinite	0
Infinite (%)	0.0%
Mean	8.6074196

	Age	Sex	HighChol	CholCheck	BMI	Smoker	PhysActivity	Fruits	Veggies	GenHlth	MentHlth	PhysHlth	Stroke	HighBP
0	4.0	1.0	0.0	1.0	26.0	0.0	1.0	0.0	1.0	3.0	5.0	30.0	0.0	1.0
1	12.0	1.0	1.0	1.0	26.0	1.0	0.0	1.0	0.0	3.0	0.0	0.0	1.0	1.0
2	13.0	1.0	0.0	1.0	26.0	0.0	1.0	1.0	1.0	1.0	0.0	10.0	0.0	0.0
3	11.0	1.0	1.0	1.0	28.0	1.0	1.0	1.0	1.0	3.0	0.0	3.0	0.0	1.0

Minimum	1
Maximum	13
Zeros	0
Zeros (%)	0.0%
Negative	0
Negative (%)	0.0%
Memory size	3.0 MiB

Minimum	1
5-th percentile	3
Q1	7
median	9
Q3	11
95-th percentile	13
Maximum	13
Range	12
Interquartile range (IQR)	4

Standard deviation	2.8607167
Coefficient of variation (CV)	0.33235474
Kurtosis	-0.21594326
Mean	8.6074196
Median Absolute Deviation (MAD)	2
Skewness	-0.54511156
Sum	551047
Variance	8.1836998
Monotonicity	Not monotonic

Value	Count	Frequency (%)
10	9589	15.0%
9	9121	14.2%
8	7829	12.2%
11	7307	11.4%
7	6202	9.7%
13	5105	8.0%
12	5035	7.9%
6	4194	6.6%
5	3170	5.0%
4	2489	3.9%
Other values (3)	3979	6.2%

Total characters	192060
Distinct characters	3
Distinct categories	2 ?
Distinct scripts	1 ?
Distinct blocks	1 ?

Distinct	80
Distinct (%)	0.1%
Missing	0
Missing (%)	0.0%
Infinite	0
Infinite (%)	0.0%
Mean	30.178554

Standard deviation	7.2877303
Coefficient of variation (CV)	0.24148706
Kurtosis	6.8303595
Mean	30.178554
Median Absolute Deviation (MAD)	4
Skewness	1.669602
Sum	1932031
Variance	53.111013
Monotonicity	Not monotonic

	Age	BMI	MentHlth	PhysHlth	Sex	HighChol	CholCheck	Smoker	HeartDiseaseorAttack	PhysActivity	Fruits	Veggies	HvyAlcoholConsump	GenHlth	DiffWalk	Stroke	HighBP	Diabetes
Age	1.000	-0.060	-0.183	0.043	0.029	0.256	0.112	0.112	0.228	0.102	0.091	0.027	0.062	0.088	0.204	0.127	0.335	0.287
BMI	-0.060	1.000	0.064	0.140	0.121	0.119	0.053	0.017	0.046	0.148	0.062	0.038	0.064	0.127	0.233	0.007	0.224	0.276
MentHlth	-0.183	0.064	1.000	0.315	0.111	0.077	0.003	0.084	0.063	0.113	0.047	0.037	0.015	0.172	0.236	0.078	0.056	0.075
PhysHlth	0.043	0.140	0.315	1.000	0.077	0.141	0.043	0.110	0.187	0.213	0.026	0.046	0.048	0.321	0.477	0.153	0.171	0.206
Sex	0.029	0.121	0.111	0.077	1.000	0.006	0.007	0.108	0.099	0.055	0.085	0.053	0.016	0.030	0.086	0.002	0.031	0.035
HighChol	0.256	0.119	0.077	0.141	0.006	1.000	0.093	0.075	0.175	0.075	0.034	0.031	0.031	0.216	0.154	0.097	0.290	0.264
CholCheck	0.112	0.053	0.003	0.043	0.007	0.093	1.000	0.000	0.049	0.015	0.012	0.002	0.025	0.075	0.053	0.026	0.113	0.126
Smoker	0.112	0.017	0.084	0.110	0.108	0.075	0.000	1.000	0.115	0.064	0.059	0.016	0.075	0.131	0.108	0.060	0.067	0.062
HeartDiseaseorAttack	0.228	0.046	0.063	0.187	0.099	0.175	0.049	0.115	1.000	0.082	0.002	0.022	0.044	0.273	0.220	0.219	0.203	0.202
PhysActivity	0.102	0.148	0.113	0.213	0.055	0.075	0.015	0.064	0.082	1.000	0.113	0.130	0.029	0.249	0.255	0.068	0.119	0.139
Fruits	0.091	0.062	0.047	0.026	0.085	0.034	0.012	0.059	0.002	0.113	1.000	0.225	0.026	0.066	0.028	0.000	0.021	0.031
Veggies	0.027	0.038	0.037	0.046	0.053	0.031	0.002	0.016	0.022	0.130	0.225	1.000	0.030	0.089	0.063	0.037	0.051	0.062
HvyAlcoholConsump	0.062	0.064	0.015	0.048	0.016	0.031	0.025	0.075	0.044	0.029	0.026	0.030	1.000	0.077	0.060	0.028	0.034	0.107
GenHlth	0.088	0.127	0.172	0.321	0.030	0.216	0.075	0.131	0.273	0.249	0.066	0.089	0.077	1.000	0.487	0.193	0.297	0.387
DiffWalk	0.204	0.233	0.236	0.477	0.086	0.154	0.053	0.108	0.220	0.255	0.028	0.063	0.060	0.487	1.000	0.182	0.227	0.262
Stroke	0.127	0.007	0.078	0.153	0.002	0.097	0.026	0.060	0.219	0.068	0.000	0.037	0.028	0.193	0.182	1.000	0.126	0.120
HighBP	0.335	0.224	0.056	0.171	0.031	0.290	0.113	0.067	0.203	0.119	0.021	0.051	0.034	0.297	0.227	0.126	1.000	0.351
Diabetes	0.287	0.276	0.075	0.206	0.035	0.264	0.126	0.062	0.202	0.139	0.031	0.062	0.107	0.387	0.262	0.120	0.351	1.000

	Age	Sex	HighChol	CholCheck	BMI	Smoker	HeartDiseaseorAttack	PhysActivity	Fruits	Veggies	GenHlth	MentHlth	PhysHlth	DiffWalk	HighBP	Diabetes
70682	9.0	0.0	0.0	1.0	37.0	0.0	0.0	0.0	0.0	0.0	4.0	0.0	30.0	1.0	1.0	1.0
70683	10.0	0.0	0.0	1.0	28.0	0.0	0.0	0.0	0.0	1.0	2.0	0.0	0.0	0.0	1.0	1.0
70684	9.0	1.0	1.0	1.0	27.0	0.0	1.0	1.0	0.0	1.0	4.0	30.0	5.0	0.0	1.0	1.0
70685	7.0	0.0	0.0	1.0	38.0	0.0	0.0	1.0	0.0	1.0	4.0	0.0	0.0	0.0	1.0	1.0
70686	11.0	1.0	1.0	1.0	27.0	0.0	0.0	1.0	1.0	0.0	4.0	0.0	30.0	0.0	0.0	1.0
70687	6.0	0.0	1.0	1.0	37.0	0.0	0.0	0.0	0.0	1.0	4.0	0.0	0.0	0.0	0.0	1.0
70688	10.0	1.0	1.0	1.0	29.0	1.0	1.0	0.0	1.0	1.0	2.0	0.0	0.0	1.0	0.0	1.0
70689	13.0	0.0	1.0	1.0	25.0	0.0	1.0	0.0	1.0	0.0	5.0	15.0	0.0	1.0	1.0	1.0
70690	11.0	0.0	1.0	1.0	18.0	0.0	0.0	0.0	0.0	0.0	4.0	0.0	0.0	1.0	1.0	1.0
70691	9.0	0.0	1.0	1.0	25.0	0.0	1.0	1.0	1.0	0.0	2.0	0.0	0.0	0.0	1.0	1.0

Numeric	4
Categorical	14

0.0	34828
1.0	29192

Max length	3
Median length	3
Mean length	3
Min length	3

Unique	0 ?
Unique (%)	0.0%

	Age	Sex	HighChol	CholCheck	BMI	Smoker	HeartDiseaseorAttack	PhysActivity	Fruits	Veggies	HvyAlcoholConsump	GenHlth	MentHlth	PhysHlth	DiffWalk	Stroke	HighBP
0	-1.026560	0.008977	-0.439114	0.029464	-0.342847	0.008693	-0.061154	0.004153	0.000423	-0.024877	0.029179	0.054244	-0.184836	0.092628	-0.089071	-0.022545	0.428644
1	0.245925	0.064547	0.163573	0.017317	-0.293388	-0.032035	-0.044783	0.073778	0.026808	0.069421	0.041073	0.084423	0.020633	0.005599	-0.064335	0.114233	0.347924
2	0.792307	0.219198	-0.151763	0.030192	-0.291248	-0.007490	-0.037095	-0.032856	-0.026772	-0.021163	0.047696	-0.887070	0.048786	0.100116	-0.087138	-0.019266	-0.563719
3	0.320420	0.077391	0.186606	0.024601	-0.149637	-0.011896	-0.036314	-0.029691	-0.017971	-0.028286	0.039637	0.077160	0.011180	-0.009014	-0.090873	-0.007912	0.307017
4	-0.045402	-0.080013	-0.232086	0.030714	0.104037	-0.003471	-0.048577	-0.004121	0.006378	-0.017323	0.043292	-0.546284	0.084615	0.044429	-0.035780	-0.017352	-0.530409

Laboratorio 10: Interpretabilidad 🤖

Cuerpo Docente:¶

Equipo: SUPER IMPORTANTE - notebooks sin nombre no serán revisados¶

Link de repositorio de GitHub: https://github.com/johnny-godoy/laboratorios-mds/blob/main/2023/lab10/laboratorio_10.ipynb¶

Indice¶

Temas a tratar¶

Reglas:¶

Objetivos principales del laboratorio¶

1. Problemas Clínicos del Dr. Simi¶

Overview

Variables

Common Values

Length

Common Values (Plot)

Most occurring characters

Most occurring categories

Most frequent character per category

Decimal Number

Other Punctuation

Most occurring scripts

Most frequent character per script

Common

Most occurring blocks

Most frequent character per block

ASCII

Common Values

Length

Common Values (Plot)

Most occurring characters

Most occurring categories

Most frequent character per category

Decimal Number

Other Punctuation

Most occurring scripts

Most frequent character per script

Common

Most occurring blocks

Most frequent character per block

ASCII

Common Values

Length

Common Values (Plot)

Most occurring characters

Most occurring categories

Most frequent character per category

Decimal Number

Other Punctuation

Most occurring scripts

Most frequent character per script

Common

Most occurring blocks

Most frequent character per block

ASCII

Common Values

Length

Common Values (Plot)

Most occurring characters

Most occurring categories

Most frequent character per category

Decimal Number

Other Punctuation

Most occurring scripts

Most frequent character per script

Common

Most occurring blocks

Most frequent character per block

ASCII

Common Values

Length

Common Values (Plot)

Most occurring characters

Most occurring categories

Most frequent character per category

Decimal Number

Other Punctuation

Most occurring scripts

Most frequent character per script

Common

Most occurring blocks

Most frequent character per block

Link de repositorio de GitHub: `https://github.com/johnny-godoy/laboratorios-mds/blob/main/2023/lab10/laboratorio_10.ipynb`¶