Revisión | 9451dab8a6c050e76bbb740c416f16935a801ff8 (tree) |
---|---|
Tiempo | 2023-01-19 18:34:03 |
Autor | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
Almost final form of the script to process the Romanian tam data.
@@ -33,22 +33,22 @@ | ||
33 | 33 | clean_names() |> |
34 | 34 | select(-c(text_integral_masura, executanti)) |
35 | 35 | |
36 | -df_name <- read_csv("correspondence.csv") |> | |
36 | +df_name <- read_csv("correspondence_modified.csv") |> | |
37 | 37 | clean_names() |> |
38 | 38 | pattern_to_na("...") |> |
39 | 39 | complete_data() |
40 | 40 | |
41 | -tam_names <- c("id", "case_reference", "aid_award_created_date", "aid_award_granted_date", | |
42 | -"aid_award_published_date", "aid_award_reference", "case_title_original", | |
43 | -"case_title_english", "main_procedure_type_code", "is_co_finance", | |
44 | -"aid_award_objective", "aid_award_objective_other_english", "aid_award_instrument", | |
45 | -"aid_award_instrument_other_english", "beneficiary_name", "beneficiary_name_english", | |
46 | -"national_identification", "national_identification_type", "beneficiary_type", | |
47 | -"beneficiary_country", "beneficiary_region", "beneficiary_sector", | |
48 | -"granted_aid_absolute_eur", "nominal_aid_absolute_eur", "granted_range_eur", | |
49 | -"aid_award_ga_original", "aid_award_ga_english", "aid_award_nuts_code", | |
50 | -"creator_country", "year", "granted_value_extended_eur", "nominal_value_extended_eur", | |
51 | -"is_covid_case") | |
41 | +## tam_names <- c("id", "case_reference", "aid_award_created_date", "aid_award_granted_date", | |
42 | +## "aid_award_published_date", "aid_award_reference", "case_title_original", | |
43 | +## "case_title_english", "main_procedure_type_code", "is_co_finance", | |
44 | +## "aid_award_objective", "aid_award_objective_other_english", "aid_award_instrument", | |
45 | +## "aid_award_instrument_other_english", "beneficiary_name", "beneficiary_name_english", | |
46 | +## "national_identification", "national_identification_type", "beneficiary_type", | |
47 | +## "beneficiary_country", "beneficiary_region", "beneficiary_sector", | |
48 | +## "granted_aid_absolute_eur", "nominal_aid_absolute_eur", "granted_range_eur", | |
49 | +## "aid_award_ga_original", "aid_award_ga_english", "aid_award_nuts_code", | |
50 | +## "creator_country", "year", "granted_value_extended_eur", "nominal_value_extended_eur", | |
51 | +## "is_covid_case") | |
52 | 52 | ## names(tam) |
53 | 53 | |
54 | 54 |
@@ -74,7 +74,8 @@ | ||
74 | 74 | ) |> |
75 | 75 | mutate(case_reference=select_left_pattern(case_reference, "/")) |> |
76 | 76 | mutate( is_covid_case=if_else(case_reference %in% covid$case_reference, |
77 | - "Yes", "No") ) | |
77 | + "Yes", "No") ) |> | |
78 | + mutate(national_identification=as.character(national_identification)) | |
78 | 79 | |
79 | 80 | |
80 | 81 |
@@ -91,7 +92,7 @@ | ||
91 | 92 | |
92 | 93 | |
93 | 94 | |
94 | -aid <- read_csv("aid_type.csv") |> | |
95 | +aid <- read_csv("aid_type_modified.csv") |> | |
95 | 96 | complete_data() |> |
96 | 97 | mutate(aid_instrument_rom=tolower(aid_instrument_rom)) |
97 | 98 |
@@ -99,11 +100,13 @@ | ||
99 | 100 | "întreprindere mică", |
100 | 101 | "întreprindere mijlocie", |
101 | 102 | "întreprindere mare", |
102 | - "altă categorie de întreprindere"), | |
103 | + "altă categorie de întreprindere", | |
104 | + "imm"), | |
103 | 105 | new=c("Small and medium-sized enterprises", |
104 | 106 | "Small and medium-sized enterprises", |
105 | 107 | "Small and medium-sized enterprises", |
106 | 108 | "Only large enterprises", |
109 | + "", | |
107 | 110 | "" |
108 | 111 | )) |
109 | 112 |
@@ -119,7 +122,7 @@ | ||
119 | 122 | |
120 | 123 | select(-c(amount_of_aid_awarded_per_sub_category_in_romanian_lei,obs_value)) |> |
121 | 124 | mutate(beneficiary_sector=as.character(beneficiary_sector)) |> |
122 | - rename("case_title"="case_title_original", | |
125 | + rename(## "case_title"="case_title_original", | |
123 | 126 | "aid_award_instrument_other_english"="aid_award_instrument") |> |
124 | 127 | mutate(aid_award_instrument_other_english=tolower(aid_award_instrument_other_english)) |> |
125 | 128 | mutate(aid_award_instrument_other_english=recode_many(aid_award_instrument_other_english, aid$aid_instrument_rom ,aid$aid_instrument_eng)) |> |
@@ -146,7 +149,9 @@ | ||
146 | 149 | "nominal_aid_absolute_eur", "granted_aid_absolute_eur", |
147 | 150 | "beneficiary_region" , "beneficiary_sector" , |
148 | 151 | "case_reference" , "beneficiary_country" , |
149 | - "year") | |
152 | + "national_identification", | |
153 | + "year" , "case_title_original" | |
154 | + ) | |
150 | 155 | |
151 | 156 | ## microîntreprindere Small and medium-sized enterprises |
152 | 157 | ## întreprindere mică Small and medium-sized enterprises |
@@ -166,7 +171,7 @@ | ||
166 | 171 | rename("aid_award_instrument"="aid_award_instrument_other_english") |> |
167 | 172 | select(any_of(nn)) |> |
168 | 173 | mutate(beneficiary_country="Romania") |> |
169 | - mutate(nominal_value_extended_eur=nominal_aid_absolute_eur, | |
174 | + mutate(nominal_value_extended_eur=granted_aid_absolute_eur, | |
170 | 175 | granted_value_extended_eur=granted_aid_absolute_eur, |
171 | 176 | is_covid_case=if_else(case_reference %in% covid$case_reference, "Yes", "No")) |> |
172 | 177 | mutate(granted_value_extended_eur=if_else(is_covid_case=="No", granted_value_extended_eur, NA_real_)) |> |