from __future__ import annotations

from zipfile import ZipFile
import dataclasses
import os

from pandas_profiling import ProfileReport
from sklearn.base import clone
from sklearn.compose import ColumnTransformer
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.experimental import enable_halving_search_cv
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, f1_score, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV, HalvingGridSearchCV
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
import catboost as cb
import lightgbm as lgb
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats

pd.options.plotting.backend = "plotly"


df_train = pd.read_pickle("data/raw/train.pickle")
df_test = pd.read_pickle("data/raw/test.pickle")
targets = ["estimated_sells", "rating"]


X = df_train.drop(targets, axis=1)
y = df_train[targets]
df_full = pd.concat([df_test.assign(split="test"),
                     X.assign(split="train")])
df_full


profile = ProfileReport(df_full,
                        title="Reporte: El desafío de Don René",
                        explorative=True,
                        vars={"num": {"low_categorical_threshold": 0}
                             },
                       )

# profile.to_file("reports/report.html")
profile


df_train["estimated_sells"].hist()


y_tr = pd.Series(scipy.stats.boxcox(df_train["estimated_sells"])[0])
y_tr.hist()


df_train["rating"].hist()


numericas = ["required_age", "english", "achievements", "average_playtime", "price"]
discretas = [True, True, True, False, False]
informacion = pd.DataFrame([mutual_info_classif(df_train[numericas],
                                                df_train["rating"],
                                                discrete_features=discretas,
                                               ),
                            mutual_info_regression(df_train[numericas],
                                                   # df_train["estimated_sells"],
                                                   y_tr,
                                                   discrete_features=discretas,
                                                  ),
                           ],
                           columns=numericas,
                           index=["clasificación", "regresión"],
                          ).T.sort_values(by="clasificación", ascending=False)
fig = px.bar(informacion)
fig.update_layout(title="Información mútua con la variable de respuesta")
fig.show()


for feat in numericas:
    fig = px.scatter(x=df_train[feat], y=y_tr)
    fig.update_layout(xaxis_title=feat, yaxis_title="estimated_sells")
    fig.show()


por_fecha = df_train[["release_date", "estimated_sells", "rating"]].groupby("release_date")
# Cantidad de ventas
por_fecha.estimated_sells.count().plot()


# Dinero promedio por fecha
por_fecha.estimated_sells.mean().plot()


continuas = ["achievements", "average_playtime", "price"]
X_to_project = df_train[continuas]
processor = make_pipeline(StandardScaler(), PCA(n_components=2))
X_tr = processor.fit_transform(X_to_project)
fig = px.scatter(X_tr[:, 0], X_tr[:, 1], color=y_tr)
fig.update_layout(xaxis_title="PC1", yaxis_title="PC2")


processor = make_pipeline(StandardScaler(), TSNE())
X_tr = processor.fit_transform(X_to_project)
px.scatter(X_tr[:, 0], X_tr[:, 1], color=y_tr)

C:\Users\David\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\manifold\_t_sne.py:795: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

C:\Users\David\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\manifold\_t_sne.py:805: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.


processor = PLSRegression()
X_tr = processor.fit_transform(X_to_project, y_tr)[0]
px.scatter(X_tr[:, 0], X_tr[:, 1], color=y_tr)


def agregar_fechas(df: pd.DataFrame) -> pd.DataFrame:
    dt = pd.to_datetime(df.release_date).dt
    return df.assign(day=dt.day, month=dt.month, year=dt.year, is_xmas=((dt.day - 25).abs()) < 5 & (dt.month == 12))


df_train = agregar_fechas(df_train)
df_test = agregar_fechas(df_test)
X = df_train.drop(targets, axis=1)
y = df_train[targets]


numericas_fechas = numericas + ["day", "month", "year", "is_xmas"]
numeric_transformer = ("numeric", StandardScaler(), numericas_fechas)


bow = CountVectorizer(binary=True)
pd.DataFrame(bow.fit_transform(df_test.platforms).todense(),
             columns=bow.get_feature_names_out())


bow = CountVectorizer(binary=True, tokenizer=lambda x: x.split(';'))
pd.DataFrame(bow.fit_transform(df_test.categories).todense(),
             columns=bow.get_feature_names_out())


mixable = ("tags", "genres", "categories")
mixable_categorizer = ColumnTransformer([(feat, clone(bow), feat) for feat in mixable])


def get_transformer(has_svd: bool) -> ColumnTransformer:
    if has_svd:
        text_pipeline = make_pipeline(mixable_categorizer, TruncatedSVD(n_components=100))
    else:
        text_pipeline = mixable_categorizer
    return ColumnTransformer([("cat", text_pipeline, mixable),
                              numeric_transformer,
                              ("platform", clone(bow), "platforms")]
                             )


with_svd = get_transformer(True)
with_svd

ColumnTransformer(transformers=[('cat',
                                 Pipeline(steps=[('columntransformer',
                                                  ColumnTransformer(transformers=[('tags',
                                                                                   CountVectorizer(binary=True),
                                                                                   'tags'),
                                                                                  ('genres',
                                                                                   CountVectorizer(binary=True),
                                                                                   'genres'),
                                                                                  ('categories',
                                                                                   CountVectorizer(binary=True),
                                                                                   'categories')])),
                                                 ('truncatedsvd',
                                                  TruncatedSVD(n_components=100))]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

ColumnTransformer(transformers=[('cat',
                                 Pipeline(steps=[('columntransformer',
                                                  ColumnTransformer(transformers=[('tags',
                                                                                   CountVectorizer(binary=True),
                                                                                   'tags'),
                                                                                  ('genres',
                                                                                   CountVectorizer(binary=True),
                                                                                   'genres'),
                                                                                  ('categories',
                                                                                   CountVectorizer(binary=True),
                                                                                   'categories')])),
                                                 ('truncatedsvd',
                                                  TruncatedSVD(n_components=100))]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

('tags', 'genres', 'categories')

ColumnTransformer(transformers=[('tags', CountVectorizer(binary=True), 'tags'),
                                ('genres', CountVectorizer(binary=True),
                                 'genres'),
                                ('categories', CountVectorizer(binary=True),
                                 'categories')])

tags

CountVectorizer(binary=True)

genres

CountVectorizer(binary=True)

categories

CountVectorizer(binary=True)


without_svd = get_transformer(False)
without_svd

ColumnTransformer(transformers=[('cat',
                                 ColumnTransformer(transformers=[('tags',
                                                                  CountVectorizer(binary=True),
                                                                  'tags'),
                                                                 ('genres',
                                                                  CountVectorizer(binary=True),
                                                                  'genres'),
                                                                 ('categories',
                                                                  CountVectorizer(binary=True),
                                                                  'categories')]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

ColumnTransformer(transformers=[('cat',
                                 ColumnTransformer(transformers=[('tags',
                                                                  CountVectorizer(binary=True),
                                                                  'tags'),
                                                                 ('genres',
                                                                  CountVectorizer(binary=True),
                                                                  'genres'),
                                                                 ('categories',
                                                                  CountVectorizer(binary=True),
                                                                  'categories')]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

('tags', 'genres', 'categories')

tags

CountVectorizer(binary=True)

genres

CountVectorizer(binary=True)

categories

CountVectorizer(binary=True)

['required_age', 'english', 'achievements', 'average_playtime', 'price', 'day', 'month', 'year', 'is_xmas']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0, stratify=y.rating)


reg = make_pipeline(with_svd, DecisionTreeRegressor(random_state=0, max_depth=3))
reg.fit(X_train, y_train.estimated_sells)
r2_score(y_test.estimated_sells, reg.predict(X_test))

0.09236911367165701


reg = make_pipeline(without_svd, DecisionTreeRegressor(random_state=0, max_depth=3))
reg.fit(X_train, y_train.estimated_sells)
r2_score(y_test.estimated_sells, reg.predict(X_test))

0.06259848348501729


reg = make_pipeline(with_svd, cb.CatBoostRegressor(random_state=0, verbose=False))
reg.fit(X_train, y_train.estimated_sells)
r2_score(y_test.estimated_sells, reg.predict(X_test))

0.2637755626634174


reg = make_pipeline(without_svd, cb.CatBoostRegressor(random_state=0, verbose=False))
reg.fit(X_train, y_train.estimated_sells)
r2_score(y_test.estimated_sells, reg.predict(X_test))

0.24614066030291437


clf = make_pipeline(with_svd, DecisionTreeClassifier(random_state=0, max_depth=3))
clf.fit(X_train, y_train.rating)
f1_score(y_test.rating, clf.predict(X_test), average="weighted")

0.22961502770897055


clf = make_pipeline(without_svd, DecisionTreeClassifier(random_state=0, max_depth=3))
clf.fit(X_train, y_train.rating)
f1_score(y_test.rating, clf.predict(X_test), average="weighted")

0.2229107368404749


clf = make_pipeline(with_svd, lgb.LGBMClassifier(random_state=0))
clf.fit(X_train, y_train.rating)
f1_score(y_test.rating, clf.predict(X_test), average="weighted")

0.3275020551556012


clf = make_pipeline(without_svd, lgb.LGBMClassifier(random_state=0))
clf.fit(X_train, y_train.rating)
f1_score(y_test.rating, clf.predict(X_test), average="weighted")

0.3310894337493198


feature_params = {}
def half_grid_search_eval(model, model_params, is_reg=True, preprocessor=with_svd):
    """Calcula el mejor modelo con gridsearch, imprime su reporte de clasificación,
    retorna el modelo y sus parámetros."""
    y_train_to_use = y_train.estimated_sells if is_reg else y_train.rating
    y_test_to_use = y_test.estimated_sells if is_reg else y_test.rating
    pipeline = Pipeline(steps=[("pipeline", with_svd), ("model", model)])
    model_params = {f"model__{key}": val for key, val in model_params.items()}
    grid = HalvingGridSearchCV(pipeline, feature_params | model_params,
                               cv=3, scoring="r2" if is_reg else "f1_weighted", random_state=0,
                               n_jobs=-1, min_resources="smallest", aggressive_elimination=True,
                               verbose=10,
                              )
    best_model = grid.fit(X_train, y_train_to_use).best_estimator_
    if not is_reg:
        print(classification_report(y_test_to_use, best_model.predict(X_test)))
    else:
        print(r2_score(y_test_to_use, best_model.predict(X_test)))
    return best_model, grid.best_params_


def grid_search_eval(model, model_params, is_reg=True):
    """Calcula el mejor modelo con gridsearch, imprime su reporte de clasificación,
    retorna el modelo y sus parámetros."""
    y_train_to_use = y_train.estimated_sells if is_reg else y_train.rating
    y_test_to_use = y_test.estimated_sells if is_reg else y_test.rating
    pipeline = Pipeline(steps=[("pipeline", with_svd), ("model", model)])
    model_params = {f"model__{key}": val for key, val in model_params.items()}
    grid = GridSearchCV(pipeline, feature_params | model_params,
                        cv=10, scoring="r2" if is_reg else "f1_weighted",
                        n_jobs=-1, verbose=10,
                       )
    best_model = grid.fit(X_train, y_train_to_use).best_estimator_
    if not is_reg:
        print(classification_report(y_test_to_use, best_model.predict(X_test)))
    else:
        print(r2_score(y_test_to_use, best_model.predict(X_test)))
    return best_model, grid.best_params_

cbreg_params = {'iterations': [500, 1000, 2000],
                'depth': np.arange(6, 11),}
model, params = half_grid_search_eval(cb.CatBoostRegressor(random_state=0, verbose=False), cbreg_params)


best_reg = make_pipeline(with_svd, cb.CatBoostRegressor(random_state=0, verbose=False, depth=7, iterations=2_000))

lgclf_params = {'extra_trees': [True, False],
                'max_bin': [127, 255, 511],
                'num_leaves': [31, 63, 127],
                'boosting_type': ["gbdt", "dart", "goss"],
               }

model, params = grid_search_eval(lgb.LGBMClassifier(random_state=0), lgclf_params, False, without_svd)


best_clf = make_pipeline(without_svd, lgb.LGBMClassifier(extra_trees=True,
                                                         random_state=0,
                                                         max_bins=511,
                                                         num_leaves=31,
                                                         boosting_type='dart',
                                                        )
                        )


best_clf.fit(X, y.rating)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('cat',
                                                  ColumnTransformer(transformers=[('tags',
                                                                                   CountVectorizer(binary=True),
                                                                                   'tags'),
                                                                                  ('genres',
                                                                                   CountVectorizer(binary=True),
                                                                                   'genres'),
                                                                                  ('categories',
                                                                                   CountVectorizer(binary=True),
                                                                                   'categories')]),
                                                  ('tags', 'genres',
                                                   'categories')),
                                                 ('numeric', StandardScaler(),
                                                  ['required_age', 'english',
                                                   'achievements',
                                                   'average_playtime', 'price',
                                                   'day', 'month', 'year',
                                                   'is_xmas']),
                                                 ('platform',
                                                  CountVectorizer(binary=True),
                                                  'platforms')])),
                ('lgbmclassifier',
                 LGBMClassifier(boosting_type='dart', extra_trees=True,
                                max_bins=511, random_state=0))])

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('cat',
                                                  ColumnTransformer(transformers=[('tags',
                                                                                   CountVectorizer(binary=True),
                                                                                   'tags'),
                                                                                  ('genres',
                                                                                   CountVectorizer(binary=True),
                                                                                   'genres'),
                                                                                  ('categories',
                                                                                   CountVectorizer(binary=True),
                                                                                   'categories')]),
                                                  ('tags', 'genres',
                                                   'categories')),
                                                 ('numeric', StandardScaler(),
                                                  ['required_age', 'english',
                                                   'achievements',
                                                   'average_playtime', 'price',
                                                   'day', 'month', 'year',
                                                   'is_xmas']),
                                                 ('platform',
                                                  CountVectorizer(binary=True),
                                                  'platforms')])),
                ('lgbmclassifier',
                 LGBMClassifier(boosting_type='dart', extra_trees=True,
                                max_bins=511, random_state=0))])

ColumnTransformer(transformers=[('cat',
                                 ColumnTransformer(transformers=[('tags',
                                                                  CountVectorizer(binary=True),
                                                                  'tags'),
                                                                 ('genres',
                                                                  CountVectorizer(binary=True),
                                                                  'genres'),
                                                                 ('categories',
                                                                  CountVectorizer(binary=True),
                                                                  'categories')]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

('tags', 'genres', 'categories')

tags

CountVectorizer(binary=True)

genres

CountVectorizer(binary=True)

categories

CountVectorizer(binary=True)


best_reg.fit(X, y.estimated_sells)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('cat',
                                                  Pipeline(steps=[('columntransformer',
                                                                   ColumnTransformer(transformers=[('tags',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'tags'),
                                                                                                   ('genres',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'genres'),
                                                                                                   ('categories',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'categories')])),
                                                                  ('truncatedsvd',
                                                                   TruncatedSVD(n_components=100))]),
                                                  ('tags', 'genres',
                                                   'categories')),
                                                 ('numeric', StandardScaler(),
                                                  ['required_age', 'english',
                                                   'achievements',
                                                   'average_playtime', 'price',
                                                   'day', 'month', 'year',
                                                   'is_xmas']),
                                                 ('platform',
                                                  CountVectorizer(binary=True),
                                                  'platforms')])),
                ('catboostregressor',
                 <catboost.core.CatBoostRegressor object at 0x00000224ABEA42B0>)])

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('cat',
                                                  Pipeline(steps=[('columntransformer',
                                                                   ColumnTransformer(transformers=[('tags',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'tags'),
                                                                                                   ('genres',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'genres'),
                                                                                                   ('categories',
                                                                                                    CountVectorizer(binary=True),
                                                                                                    'categories')])),
                                                                  ('truncatedsvd',
                                                                   TruncatedSVD(n_components=100))]),
                                                  ('tags', 'genres',
                                                   'categories')),
                                                 ('numeric', StandardScaler(),
                                                  ['required_age', 'english',
                                                   'achievements',
                                                   'average_playtime', 'price',
                                                   'day', 'month', 'year',
                                                   'is_xmas']),
                                                 ('platform',
                                                  CountVectorizer(binary=True),
                                                  'platforms')])),
                ('catboostregressor',
                 <catboost.core.CatBoostRegressor object at 0x00000224ABEA42B0>)])

ColumnTransformer(transformers=[('cat',
                                 Pipeline(steps=[('columntransformer',
                                                  ColumnTransformer(transformers=[('tags',
                                                                                   CountVectorizer(binary=True),
                                                                                   'tags'),
                                                                                  ('genres',
                                                                                   CountVectorizer(binary=True),
                                                                                   'genres'),
                                                                                  ('categories',
                                                                                   CountVectorizer(binary=True),
                                                                                   'categories')])),
                                                 ('truncatedsvd',
                                                  TruncatedSVD(n_components=100))]),
                                 ('tags', 'genres', 'categories')),
                                ('numeric', StandardScaler(),
                                 ['required_age', 'english', 'achievements',
                                  'average_playtime', 'price', 'day', 'month',
                                  'year', 'is_xmas']),
                                ('platform', CountVectorizer(binary=True),
                                 'platforms')])

('tags', 'genres', 'categories')

ColumnTransformer(transformers=[('tags', CountVectorizer(binary=True), 'tags'),
                                ('genres', CountVectorizer(binary=True),
                                 'genres'),
                                ('categories', CountVectorizer(binary=True),
                                 'categories')])

tags

CountVectorizer(binary=True)

genres

CountVectorizer(binary=True)

categories


def generateFiles(predict_data, clf_pipe, rgr_pipe):
    """Genera los archivos a subir en CodaLab

    Input
    predict_data: Dataframe con los datos de entrada a predecir
    clf_pipe: pipeline del clf
    rgr_pipe: pipeline del rgr

    Ouput
    archivo de txt
    """
    y_pred_clf = clf_pipe.predict(predict_data) #.as_data_frame().values[:, 0].flatten()
    y_pred_rgr = rgr_pipe.predict(predict_data) #.as_data_frame().values.flatten()

    with open('./predictions_clf.txt', 'w') as f:
        for item in y_pred_clf:
            f.write("%s\n" % item)

    with open('./predictions_rgr.txt', 'w') as f:
        for item in y_pred_rgr:
            f.write("%s\n" % item)

    with ZipFile('predictions_final.zip', 'w') as zipObj2:
       zipObj2.write('predictions_rgr.txt')
       zipObj2.write('predictions_clf.txt')

    os.remove("predictions_rgr.txt")
    os.remove("predictions_clf.txt")


generateFiles(df_test, clf_fitted, reg_fitted)

	name	release_date	english	developer	publisher	platforms	required_age	categories	genres	tags	achievements	average_playtime	price	short_description	split
0	Frog Climbers	2016-10-06	1	TeamCrew	Dear Villagers	windows	0	Single-player;Local Multi-Player;Shared/Split ...	Indie	Indie;Local Multiplayer;Funny	0	239	4.99	Climb mountains. Beat your friends. Be a douch...	test
1	Aztaka	2009-11-05	1	Citeremis Inc.	Citeremis Inc.	windows;mac	0	Single-player	Action;RPG;Indie	RPG;Action;Indie	0	0	3.99	Inspired by Aztec legend, this action/rpg side...	test
2	Assault Spy / アサルトスパイ	2018-10-02	1	Wazen	NIS America, Inc.	windows	0	Single-player;Full controller support	Action;Indie	Action;Indie;Character Action Game	28	0	24.99	Dash, evade, and smash your way to the truth a...	test
3	Down To One	2016-01-07	1	Gadget Games	Gadget Games	windows	0	Multi-player;Steam Trading Cards;Stats	Action	Action;Survival;Open World	0	206	0.79	42 Players Start. Only 1 will survive.Down to ...	test
4	Stranded Deep	2015-01-23	1	Beam Team Games	Beam Team Pty Ltd	windows;mac;linux	0	Single-player;Full controller support;Includes...	Adventure;Indie;Early Access	Early Access;Survival;Open World	8	272	10.99	Take the role of a plane crash survivor strand...	test
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
7876	KOEWOTAYORINI / 声之寄托	2018-03-26	0	あみそ組	Pujia8 Studio;Gamera Game	windows	0	Single-player;Steam Achievements;Steam Cloud	Adventure;Free to Play;Indie;RPG	Free to Play;Adventure;Anime	20	65	0.00	“喂喂，求求你啦！救救我！” ——你接到这样一个电话发出这样的求救，给你打电话的人，是一名...	train
7877	Montaro	2016-07-25	1	JCKSLAP	MBDL	windows	0	Single-player;Steam Achievements;Steam Trading...	Casual;Indie	Memes;Cute;Casual	15	174	0.79	Montaro is a DOGE.	train
7878	Moe Jigsaw	2018-03-23	1	ARES Inc.	ARES Inc.	windows	0	Single-player;Steam Achievements;Steam Trading...	Casual;Indie	Casual;Nudity;Indie	72	0	2.89	"Moe Jigsaw" is the definitive versi...	train
7879	Drunkn Bar Fight	2016-11-28	1	The Munky	The Munky	windows	0	Single-player;Multi-player;Online Multi-Player...	Action;Indie;Early Access	Early Access;Action;Indie	0	0	10.99	VR PARTY GAMEDrunkn Bar Fight is a simple, imm...	train
7880	Intake	2013-11-06	1	Cipher Prime Studios	Cipher Prime Studios	windows;mac	0	Single-player;Steam Achievements;Steam Cloud;S...	Action;Indie	Indie;Action;Great Soundtrack	77	75	6.99	Intake is the new retro-futuristic drugstep ar...	train

	linux	mac	windows
0	0	0	1
1	0	1	1
2	0	0	1
3	0	0	1
4	1	1	1
...	...	...	...
871	0	0	1
872	1	1	1
873	0	0	1
874	0	0	1
875	0	0	1

	captions available	co-op	commentary available	cross-platform multiplayer	full controller support	in-app purchases	includes level editor	includes source sdk	local co-op	local multi-player	...	stats	steam achievements	steam cloud	steam leaderboards	steam trading cards	steam turn notifications	steam workshop	steamvr collectibles	valve anti-cheat enabled	vr support
0	0	0	0	0	1	0	0	0	0	1	...	0	0	0	1	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	1	0	0	0	0	0
4	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
871	0	0	0	0	0	1	0	0	0	0	...	1	1	1	0	0	0	0	0	0	0
872	0	0	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
873	0	0	0	0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0
874	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
875	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0

Proyecto: El Desafío de Don Rene¶

Cuerpo Docente:¶

Reglas¶

El desafio de Don Rene¶

Definición Formal del Problema¶

Proyecto¶

Equipo:¶

1. Introducción¶

2. Análisis Exploratorio de Datos¶

2.1 Univariado¶

2.1 Bivariado¶

2.2 Reducción de dimensionalidad¶

3. Preparación de Datos¶

3.1 Fechas y numéricas¶

3.2 Características textuales¶

3.3 Creando el ColumnTransformer¶

4. Baseline¶

4.1 Regressión¶

Bajo calibre¶

Alto calibre¶

4.2 Clasificación¶

Bajo calibre¶

Alto calibre¶

5. Optimización del Modelo¶

5.1 Regresión¶

5.2 Clasificación¶

6. Conclusiones¶

Anexo: Generación de Archivo Submit de la Competencia¶

	linux	mac	windows
0	0	0	1
1	0	1	1
2	0	0	1
3	0	0	1
4	1	1	1
...	...	...	...
871	0	0	1
872	1	1	1
873	0	0	1
874	0	0	1
875	0	0	1

	captions available	co-op	commentary available	cross-platform multiplayer	full controller support	in-app purchases	includes level editor	includes source sdk	local co-op	local multi-player	...	stats	steam achievements	steam cloud	steam leaderboards	steam trading cards	steam turn notifications	steam workshop	steamvr collectibles	valve anti-cheat enabled	vr support
0	0	0	0	0	1	0	0	0	0	1	...	0	0	0	1	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	1	0	0	0	0	0
4	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
871	0	0	0	0	0	1	0	0	0	0	...	1	1	1	0	0	0	0	0	0	0
872	0	0	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
873	0	0	0	0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0
874	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
875	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0

	linux	mac	windows
0	0	0	1
1	0	1	1
2	0	0	1
3	0	0	1
4	1	1	1
...	...	...	...
871	0	0	1
872	1	1	1
873	0	0	1
874	0	0	1
875	0	0	1

	captions available	co-op	commentary available	cross-platform multiplayer	full controller support	in-app purchases	includes level editor	includes source sdk	local co-op	local multi-player	...	stats	steam achievements	steam cloud	steam leaderboards	steam trading cards	steam turn notifications	steam workshop	steamvr collectibles	valve anti-cheat enabled	vr support
0	0	0	0	0	1	0	0	0	0	1	...	0	0	0	1	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	1	0	0	0	0	0
4	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
871	0	0	0	0	0	1	0	0	0	0	...	1	1	1	0	0	0	0	0	0	0
872	0	0	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
873	0	0	0	0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0
874	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
875	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0

	linux	mac	windows
0	0	0	1
1	0	1	1
2	0	0	1
3	0	0	1
4	1	1	1
...	...	...	...
871	0	0	1
872	1	1	1
873	0	0	1
874	0	0	1
875	0	0	1

	captions available	co-op	commentary available	cross-platform multiplayer	full controller support	in-app purchases	includes level editor	includes source sdk	local co-op	local multi-player	...	stats	steam achievements	steam cloud	steam leaderboards	steam trading cards	steam turn notifications	steam workshop	steamvr collectibles	valve anti-cheat enabled	vr support
0	0	0	0	0	1	0	0	0	0	1	...	0	0	0	1	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	1	0	0	0	0	0
4	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
871	0	0	0	0	0	1	0	0	0	0	...	1	1	1	0	0	0	0	0	0	0
872	0	0	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
873	0	0	0	0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	0	0
874	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0
875	0	1	0	0	0	0	0	0	0	0	...	0	1	1	0	1	0	0	0	0	0