Are you sure you want to delete this access key?
💈 Développez un moteur de recommandation de films
Preamble Emacs Setup noexport(setq org-src-fontify-natively t)
(setq lsp-semantic-tokens-enable t) (setq lsp-enable-symbol-highlighting t)
(setq lsp-enable-file-watchers nil read-process-output-max (* 1024 1024) gc-cons-threshold 100000000 lsp-idle-delay 0.5 ;; lsp-eldoc-hook nil lsp-eldoc-enable-hover nil
;;pas de fil d'ariane lsp-headerline-breadcrumb-enable nil ;; pas de imenu voir menu-list lsp-enable-imenu nil ;; lentille lsp-lens-enable t
lsp-semantic-highlighting t lsp-modeline-code-actions-enable t )
(setq lsp-completion-provider :company lsp-completion-show-detail t lsp-completion-show-kind t)
(setq lsp-ui-doc-enable t lsp-ui-doc-show-with-mouse nil lsp-ui-doc-show-with-cursor t lsp-ui-doc-use-childframe t
lsp-ui-sideline-diagnostic-max-line-length 80
;; lsp-ui-imenu lsp-ui-imenu-enable nil ;; lsp-ui-peek lsp-ui-peek-enable t ;; lsp-ui-sideline lsp-ui-sideline-enable t lsp-ui-sideline-ignore-duplicate t lsp-ui-sideline-show-symbol t lsp-ui-sideline-show-hover t lsp-ui-sideline-show-diagnostics t lsp-ui-sideline-show-code-actions t )
(setq lsp-diagnostics-provider :none lsp-modeline-diagnostics-enable nil lsp-signature-auto-activate nil ;; you could manually request them via `lsp-signature-activate` lsp-signature-render-documentation nil)
Imports%matplotlib inline %load_ext autoreload %autoreload 2
import sys import os import warnings warnings.filterwarnings("ignore") import pickle
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns
from keras.preprocessing.image import load_img from keras.preprocessing.image import img_to_array from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import VGG16 from keras.models import Model
from sklearn.cluster import KMeans from sklearn.decomposition import PCA
from random import randint import pickle
:results:
:end:
Functionsdef display_all(df): with pd.option_context("display.max_rows", 100, "display.max_columns", 100): display(df)
:results:
:end:
Org noexportimport IPython import tabulate
class OrgFormatter(IPython.core.formatters.BaseFormatter): format_type = IPython.core.formatters.Unicode('text/org') print_method = IPython.core.formatters.ObjectName('_repr_org_')
def pd_dataframe_to_org(df): return tabulate.tabulate(df, headers='keys', tablefmt='orgtbl', showindex='always')
ip = get_ipython() ip.display_formatter.formatters['text/org'] = OrgFormatter()
f = ip.display_formatter.formatters['text/org'] f.for_type_by_name('pandas.core.frame', 'DataFrame', pd_dataframe_to_org)
:results:
:end:
The Data Load Datadf = pd.read_csv('/Users/lss/Sites/simplon.ai/briefs/recsys/data/movie_metadata_prepared.csv')
:results:
0 - 06c3c7bc
-1cc7-4984-a890-a11f3ea59b
:end:
display_all(df.describe(include='all').T)
:results:
count | unique | top | freq | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|---|---|---|
color | 5024 | 2 | Color | 4815 | nan | nan | nan | nan | nan | nan | nan |
director_name | 4939 | 2398 | Steven Spielberg | 26 | nan | nan | nan | nan | nan | nan | nan |
num_critic_for_reviews | 4993 | nan | nan | nan | 140.194 | 121.602 | 1 | 50 | 110 | 195 | 813 |
duration | 5028 | nan | nan | nan | 107.201 | 25.1974 | 7 | 93 | 103 | 118 | 511 |
director_facebook_likes | 4939 | nan | nan | nan | 686.509 | 2813.33 | 0 | 7 | 49 | 194.5 | 23000 |
actor_3_facebook_likes | 5020 | nan | nan | nan | 645.01 | 1665.04 | 0 | 133 | 371.5 | 636 | 23000 |
actor_2_name | 5030 | 3032 | Morgan Freeman | 20 | nan | nan | nan | nan | nan | nan | nan |
actor_1_facebook_likes | 5036 | nan | nan | nan | 6560.05 | 15020.8 | 0 | 614 | 988 | 11000 | 640000 |
gross | 4159 | nan | nan | nan | 4.84684e+07 | 6.8453e+07 | 162 | 5.34099e+06 | 2.55175e+07 | 6.23094e+07 | 7.60506e+08 |
genres | 5043 | 914 | Drama | 236 | nan | nan | nan | nan | nan | nan | nan |
actor_1_name | 5036 | 2097 | Robert De Niro | 49 | nan | nan | nan | nan | nan | nan | nan |
movie_title | 5043 | 4917 | Ben-Hur | 3 | nan | nan | nan | nan | nan | nan | nan |
num_voted_users | 5043 | nan | nan | nan | 83668.2 | 138485 | 5 | 8593.5 | 34359 | 96309 | 1.68976e+06 |
cast_total_facebook_likes | 5043 | nan | nan | nan | 9699.06 | 18163.8 | 0 | 1411 | 3090 | 13756.5 | 656730 |
actor_3_name | 5020 | 3521 | John Heard | 8 | nan | nan | nan | nan | nan | nan | nan |
facenumber_in_poster | 5030 | nan | nan | nan | 1.37117 | 2.01358 | 0 | 0 | 1 | 2 | 43 |
plot_keywords | 4890 | 4760 | based on novel | 4 | nan | nan | nan | nan | nan | nan | nan |
movie_imdb_link | 5043 | 4919 | http://www.imdb.com/title/tt0232500/?ref_=fn_tt_tt_1 | 3 | nan | nan | nan | nan | nan | nan | nan |
num_user_for_reviews | 5022 | nan | nan | nan | 272.771 | 377.983 | 1 | 65 | 156 | 326 | 5060 |
language | 5031 | 47 | English | 4704 | nan | nan | nan | nan | nan | nan | nan |
country | 5038 | 65 | USA | 3807 | nan | nan | nan | nan | nan | nan | nan |
content_rating | 4740 | 18 | R | 2118 | nan | nan | nan | nan | nan | nan | nan |
budget | 4551 | nan | nan | nan | 3.97526e+07 | 2.06115e+08 | 218 | 6e+06 | 2e+07 | 4.5e+07 | 1.22155e+10 |
title_year | 4935 | nan | nan | nan | 2002.47 | 12.4746 | 1916 | 1999 | 2005 | 2011 | 2016 |
actor_2_facebook_likes | 5030 | nan | nan | nan | 1651.75 | 4042.44 | 0 | 281 | 595 | 918 | 137000 |
imdb_score | 5043 | nan | nan | nan | 6.44214 | 1.12512 | 1.6 | 5.8 | 6.6 | 7.2 | 9.5 |
aspect_ratio | 4714 | nan | nan | nan | 2.2204 | 1.38511 | 1.18 | 1.85 | 2.35 | 2.35 | 16 |
movie_facebook_likes | 5043 | nan | nan | nan | 7525.96 | 19320.4 | 0 | 0 | 166 | 3000 | 349000 |
end |
numerical = df_raw.select_dtypes(include='number').columns categorical = df_raw.select_dtypes(exclude='number').columns
print(f"categorical columns are : {', '.join(str(x) for x in categorical)}") print(f"numerical columns are : {', '.join(str(x) for x in numerical)}")
categorical columns are : color, director_name, actor_2_name, genres, actor_1_name, movie_title, actor_3_name, plot_keywords, movie_imdb_link, language, country, content_rating
numerical columns are : num_critic_for_reviews, duration, director_facebook_likes, actor_3_facebook_likes, actor_1_facebook_likes, gross, num_voted_users, cast_total_facebook_likes, facenumber_in_poster, num_user_for_reviews, budget, title_year, actor_2_facebook_likes, imdb_score, aspect_ratio, movie_facebook_likes
path = "/Users/lss//Sites/simplon.ai/briefs/recsys/data/posters"
os.chdir(path)
posters = []
with os.scandir(path) as files: # loops through each file in the directory for file in files: if file.name.endswith('.jpg'): # adds only the image files to the flowers list posters.append(file.name)
:results:
:end:
img = load_img(posters[0], target_size=(224,224))
img = np.array(img)
print(img.shape)
:results:
:end:
reshaped_img = img.reshape(1,224,224,3) print(reshaped_img.shape)
:results:
:end:
Preprocessingx = preprocess_input(reshaped_img)
:results:
:end:
The Model ArchitectureNous pouvons maintenant charger le modèle VGG et supprimer la couche de sortie manuellement. Cela signifie que la nouvelle couche finale est une couche entièrement connectée avec 4 096 nœuds de sortie. Ce vecteur de 4 096 nombres est le vecteur de caractéristiques que nous utiliserons pour regrouper les images.
model = VGG16()
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
:results:
:end:
features = model.predict(x, use_multiprocessing=True)
:results:
:end:
print(features.shape) features
:results:
array([[0.4690976, 0. , 0. , ..., 0. , 0. ,
3.0501497]], dtype=float32)
:end:
Bibliography Referencesbibliographystyle:unsrt bibliography:recsys.bib
Local Variables noexportPress p or to see the previous file or, n or to see the next file
Are you sure you want to delete this access key?
Are you sure you want to delete this access key?
Are you sure you want to delete this access key?
Are you sure you want to delete this access key?