# Tyler Moore # R code for analyzing Bitcoin exchange failure # Paper http://lyle.smu.edu/~tylerm/fc13.pdf # Last modified 2013-05-05 options(scipen=999) #read in survival table survex3<-read.table("http://lyle.smu.edu/~tylerm/data/bitcoin/sdatamodCC.csv",sep=",",header=T) survex3$dailyvol<-survex3$totalvol/survex3$lifetime #anti-money laundering indicators aml<-read.table("http://lyle.smu.edu/~tylerm/data/bitcoin/compliance-aml-cft-whole.csv",sep=",",header=T) amls<-merge(survex3,aml,by="Country") amls$PrevN<-amls$Preventive/amls$PreventativeMax amls$InstN<-amls$Institutional/amls$InstitutionalMax amls$LegalN<-amls$Legal/amls$LegalMax amls$AllN<-amls$All/amls$AllMax amlsv<-amls[!is.na(amls$dailyvol),] row.names(amlsv)<-amlsv$exchange library(survival) # Cox model cox.vh<-coxph(Surv(time=amlsv$lifetime,event=amlsv$censored,type='right')~log2(amlsv$dailyvol)+amlsv$Hacked+amlsv$All,method="breslow") summary(cox.vh) #gives relative risk compared to average amlsv$risk<-predict(cox.vh,amlsv,type="risk") plot(survfit(cox.vh)) meancox<-survfit(cox.vh) meanhaz<-exp(coef(cox.vh)[1]*mean(log2(amlsv$dailyvol),na.rm=T)+coef(cox.vh)[2]*mean(as.numeric(amlsv$Hacked),na.rm=T)) coxplots<-survfit(cox.vh,newdata=amlsv) amlsv$exchange[!is.na(amlsv$Hacked)&!is.na(amlsv$dailyvol)] #Code for Figure 1 #pdf("../fig/survcox.pdf",width=6,height=5) par(mar=c(4.1,4.1,0.5,0.5)) plot(coxplots[15],col="green",lty="dashed",lwd=2, xlab="Days", ylab="Survival probability", cex.lab=1.3, cex.axis=1.3 ) #Mt Gox lines(coxplots[2],col="red",lty="dotted",lwd=2) #Vircurex (counterexample: still open, lo-vol) lines(coxplots[28],col="blue",lty="dotdash",lwd=2) #Intersango (still open, higher volume) lines(coxplots[30],col="brown",lty="longdash",lwd=2) #Bitfloor (security breach, hard to see what point is). lines(survfit(cox.vh),lwd=2) #Mean legend("topright",legend=c("Intersango","Mt. Gox","Bitfloor","Vircurex","Average"),col=c("blue","green","brown","red","black"),lwd=2,lty=c("dotdash","dashed","longdash","dotted","solid")) dev.off() #get coxplots index from the following line (level order) amlsv$filename amlsv$lifemo<-amlsv$lifetime/30.#recreate table 1 #Now do the logistic regression based on breach probability hlogit <- glm(Hacked ~ log2(dailyvol) + lifemo, data = amlsv, family = binomial(link="logit")) summary(hlogit) #get the odds ratio with 95% CI exp(cbind(OR = coef(hlogit), confint(hlogit))) amlsv$Closed<-amlsv$censor==1 #get predictions for hacking probability for hypothetical daily transaction volumes newdata1 <- with(amlsv, data.frame(lifetime = mean(lifetime), dailyvol=c(10,100,1000,10000))) newdata1$rankP <- predict(hlogit, newdata = newdata1, type = "response") #make a graph of predicted hack based on daily volume newdata2 <- with(amlsv, data.frame(lifetime = mean(lifetime), dailyvol=seq(from = 2, to = 50000, length.out = 10000))) newdata2 <- cbind(newdata2, predict(hlogit, newdata = newdata2, type = "response", se = TRUE)) newdata2$fitmin<-with(newdata2,fit - (1.645 * se.fit)) newdata2$fitmin[newdata2$fitmin<0]<-0.0 newdata2$fitmax<-with(newdata2,fit + (1.645 * se.fit)) newdata2$fitmax[newdata2$fitmax>1]<-1.0 #Figure 2 #pdf("../fig/volumebreach.pdf",width=6,height=4) par(mar=c(4.1,4.1,0.5,0.7)) plot(y=newdata2$fit,x=newdata2$dailyvol,type='l',log='x',ylim=c(0,1),lwd=2, xlab="Daily transaction volume at exchange", ylab="Probability exchange has breach", cex.lab=1.3, cex.axis=1.3 ) lines(y=newdata2$fitmin,x=newdata2$dailyvol,type='l',lty='dashed',lwd=2) lines(y=newdata2$fitmax,x=newdata2$dailyvol,type='l',lty='dashed',lwd=2) legend("topleft",legend=c("Predicted probability","90% C.I."),col="black",lwd=2,lty=c("solid","dashed")) dev.off() #stats for model fit #this gives chi-squared with(hlogit, null.deviance - deviance) #this gives associated p value with(hlogit, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))