Search
Classification

Notes du cours

Dans le répertoire https://github.com/nmeraihi/ACT6100 du cours, vous trouverez les notes du cours de Mathieu Pigeon présentées dans le cours

Exemple en classe

df<-read.csv("https://raw.githubusercontent.com/nmeraihi/ACT6100/master/data/Ronfle.txt", sep="\t")
head(df)
AgePoidsTailleIMCAlccolSexeRonfleTabacAlcoolDiscAgeDiscIMCDisc
33 64 160 25.000000 1 0 1 0 1 2
38 42 161 16.203084 0 0 1 1 1 1
57 49 164 18.218320 1 0 1 0 3 1
64 49 164 18.218324 0 1 1 1 4 1
56 58 164 21.564547 0 1 0 2 3 2
51 91 195 23.931622 0 1 1 1 3 2

commencer par la variables réponse:

df$Ronfle<-as.factor(df$Ronfle)
df$Sexe<-as.factor(df$Sexe)
df$Tabac<-as.factor(df$Tabac)
df$IMCDisc<-as.factor(df$IMCDisc)
df$AlcoolDisc<-as.factor(df$AlcoolDisc)
modele_binom<-glm(data =  df,formula = Ronfle~Age+Tabac+IMCDisc+AlcoolDisc,
                 family=binomial(link = "logit"))
summary(modele_binom)
Call:
glm(formula = Ronfle ~ Age + Tabac + IMCDisc + AlcoolDisc, family = binomial(link = "logit"), 
    data = df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5166  -0.8823  -0.5250   1.0467   2.2500  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.62024    1.91803  -1.887 0.059096 .  
Age          0.06194    0.02352   2.634 0.008450 ** 
Tabac1      -1.22609    0.57153  -2.145 0.031931 *  
IMCDisc2    -0.76402    1.49032  -0.513 0.608192    
IMCDisc3    -0.57495    1.38250  -0.416 0.677502    
IMCDisc4    -1.11395    1.56191  -0.713 0.475723    
AlcoolDisc1  1.18624    0.60447   1.962 0.049712 *  
AlcoolDisc2  2.45785    0.72666   3.382 0.000719 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 129.49  on 99  degrees of freedom
Residual deviance: 108.77  on 92  degrees of freedom
AIC: 124.77

Number of Fisher Scoring iterations: 4
modele_binom_2<-glm(data=df, 
                    formula = Ronfle ~ Age + Tabac + AlcoolDisc,
                   family=binomial(link = "logit"))
summary(modele_binom_2)
Call:
glm(formula = Ronfle ~ Age + Tabac + AlcoolDisc, family = binomial(link = "logit"), 
    data = df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4863  -0.9201  -0.5223   1.0958   2.2622  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -4.24010    1.39577  -3.038 0.002383 ** 
Age          0.06111    0.02334   2.618 0.008839 ** 
Tabac1      -1.17120    0.55651  -2.105 0.035332 *  
AlcoolDisc1  1.21450    0.59950   2.026 0.042779 *  
AlcoolDisc2  2.37953    0.71553   3.326 0.000882 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 129.49  on 99  degrees of freedom
Residual deviance: 109.50  on 95  degrees of freedom
AIC: 119.5

Number of Fisher Scoring iterations: 4
$$pred=-4.24010+0.06111\text{ Age}-1.17120\text{ Tabac1}+1.21450 \text{1.21450}+2.37953 \text{ AlcoolDisc2}$$
head(df)
AgePoidsTailleIMCAlccolSexeRonfleTabacAlcoolDiscAgeDiscIMCDisc
33 64 160 25.000000 1 0 1 0 1 2
38 42 161 16.203084 0 0 1 1 1 1
57 49 164 18.218320 1 0 1 0 3 1
64 49 164 18.218324 0 1 1 1 4 1
56 58 164 21.564547 0 1 0 2 3 2
51 91 195 23.931622 0 1 1 1 3 2
prediction<-predict(modele_binom_2, df, type="response")
head(prediction)
1
0.0324598653554836
2
0.132996954487015
3
0.126953143173517
4
0.429003055544818
5
0.826561336980563
6
0.253445835532904
tau=.5
prediction_0_1<-as.numeric(prediction>tau)
df_confus<-data.frame(obser=df$Ronfle, pred_0_1=prediction_0_1)
head(df_confus)
obserpred_0_1
00
00
00
10
11
10

$\text{sensib}=\frac{\text{0 correct}}{\text{total 0}}$

table(df_confus)
     pred_0_1
obser  0  1
    0 55 10
    1 21 14
sens=55/65
sepec=14/(14+21)
prob=-4.24010+50*0.06111
prob
-1.1846
exp(prob)/(1+exp(prob))
0.23422611224904