• R/O
  • SSH

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revisión3387593ddd7bde7d7c045fd0eab12f8a90329889 (tree)
Tiempo2024-10-09 00:29:55
AutorLorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

A code to generate some synthetic data.

Cambiar Resumen

Diferencia incremental

diff -r 154d30e4eb43 -r 3387593ddd7b R-codes/synthesis.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/R-codes/synthesis.R Tue Oct 08 17:29:55 2024 +0200
@@ -0,0 +1,55 @@
1+rm(list=ls())
2+library(tidyverse)
3+library(janitor)
4+library(charlatan)
5+
6+source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
7+
8+set.seed(1234)
9+
10+nn <- ch_name(30) |>
11+ sort()
12+
13+
14+ll <- tibble(x= sample(nn, 100e3, replace=T, prob=(1:30)/sum(1:30))) |>
15+ group_by(x) |>
16+ group_split()
17+
18+p <- runif(30,0,1) ## because the failure probability is 1-success probability
19+
20+df_p <- tibble(real_success_rate=p, group=nn)
21+
22+res <- map2(ll,p, \(z,y) tibble(x=rbinom(nrow(z),1, y)))
23+
24+df_ll <- ll |>
25+ list_to_df() |>
26+ rename("name"="x")
27+
28+df_res <- res |>
29+ list_to_df() |>
30+ rename("success"="x") ## |>
31+ ## mutate(failure=round(failure, 0))
32+
33+
34+test <- df_res |>
35+ group_by(source) |>
36+ summarise(n=n(), mean_success=mean(success)) |>
37+ ungroup() |>
38+ arrange(mean_success)
39+
40+
41+df_out <- tibble(name=df_ll$name, success=df_res$success) |>
42+ left_join(y=df_p, by=c("name"="group"))
43+
44+test2 <- df_out |>
45+ group_by(name) |>
46+ summarise(n=n(), mean_success=mean(success),
47+ real_rate=real_success_rate[1]) |>
48+ ungroup() |>
49+ arrange(mean_success)
50+
51+
52+write_csv(df_out, "artificial_data.csv.gz")
53+
54+
55+print("So far so good")