---
title: "csdata"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{csdata}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(csdata)
library(data.table)
library(magrittr)
```

`csdata` provides structural data for Norway.

Please see https://www.csids.no/csdata/reference/index.html for a quick overview of all available datasets and functions.

## Location

Valid locations (and location types) are available in `csdata::nor_locations_names()`.  Uncommon/internal use is demarcated by a line through the text.

```{r echo=FALSE, results='asis'}
d <- csdata::nor_locations_names()[, .(
  n = .N,
  location_code = location_code[1],
  location_name = location_name[1],
  location_name_description_nb = location_name_description_nb[1],
  location_name_file_nb_utf = location_name_file_nb_utf[1],
  location_name_file_nb_ascii = location_name_file_nb_ascii[1]
),
by = .(granularity_geo)
]

gt::gt(d) %>%
  gt::tab_options(
    table.width = "1500px"
  ) %>%
  gt::tab_header(title = "Valid locations and location types in the csverse format") %>%
  gt::cols_label(
    granularity_geo = "Geo (Granularity)",
    n = "N"
  ) %>%
  # gt::cols_width(
  #   granularity_time ~ "20%",
  #   class ~ "15%",
  #   fn ~ "20%",
  #   example ~ "55%"
  # ) %>%
  gt::tab_spanner(
    label = "Examples",
    columns = c(location_code, location_name, location_name_description_nb, location_name_file_nb_utf, location_name_file_nb_ascii)
  ) %>%
  gt::tab_footnote(
    footnote = gt::md("**location_code**: Used a) **inside datasets** and b) in data **file names** for transfer of data/results between analytic systems. All values are unique."),
    locations = gt::cells_column_labels(
      columns = location_code
    )
  ) %>%
  gt::tab_footnote(
    footnote = gt::md("**location_name**: Used (rarely) **inside results** (figures, tables, documents). Can be confusing as some names are duplicated. Its rare usage is demarcated by a line through the text."),
    locations = gt::cells_column_labels(
      columns = location_name
    )
  ) %>%
  gt::tab_style(
    style = list(
      gt::cell_text(decorate = "line-through")
    ),
    locations = gt::cells_body(
      columns = location_name,
      rows = gt::everything()
    )
  ) %>%
  gt::tab_footnote(
    footnote = gt::md("**location_name_description_nb**: Used (frequently) **inside results** (figures, tables, documents). All values are unique."),
    locations = gt::cells_column_labels(
      columns = location_name_description_nb
    )
  ) %>%
  gt::tab_footnote(
    footnote = gt::md("**location_name_file_nb_utf**: Used (frequently) in the **file names** for results (figures, tables, documents). All values are unique."),
    locations = gt::cells_column_labels(
      columns = location_name_file_nb_utf
    )
  ) %>%
  gt::tab_footnote(
    footnote = gt::md("**location_name_file_nb_ascii**: Used (rarely) in the **file names** for results (figures, tables, documents). Used if file systems have problems with the Norwegian letters æøå. All values are unique."),
    locations = gt::cells_column_labels(
      columns = location_name_file_nb_ascii
    )
  ) %>%
  gt::tab_footnote(
    footnote = "Bo- og arbeidsmarkedsregioner. Housing and labor market regions.",
    locations = gt::cells_body(
      columns = granularity_geo,
      rows = granularity_geo == "baregion"
    )
  ) %>%
  gt::tab_footnote(
    footnote = "Landsdeler/riskdeler. Geographical regions.",
    locations = gt::cells_body(
      columns = granularity_geo,
      rows = granularity_geo == "georegion"
    )
  ) %>%
  gt::tab_footnote(
    footnote = "Mattilsynet-regioner. Food authority regions.",
    locations = gt::cells_body(
      columns = granularity_geo,
      rows = granularity_geo == "mtregion"
    )
  )
```

## Ages

Ages should be coded as characters and should always contain 3 digits. If it is an age range, the two ages are joined by an underscore (e.g. `005_010`). 

Use `085p` instead of `>=085` or `85+`, as this will allow for an easy conversion from long to wide formatted data.

```{r echo=FALSE, results='asis'}
d <- rbind(
  data.frame(
    value = "\"000\"",
    class = "character",
    definition = "One year age group (0 year olds)"
  ),
  data.frame(
    value = "\"079\"",
    class = "character",
    definition = "One year age group(79 year olds)"
  ),
  data.frame(
    value = "\"000_004\"",
    class = "character",
    definition = "Age span of 0-4 year olds"
  ),
  data.frame(
    value = "\"065p\"",
    class = "character",
    definition = "Age span of >=65 year olds"
  ),
  data.frame(
    value = "\"missing\"",
    class = "character",
    definition = "Missing/unknown"
  ),
  data.frame(
    value = "\"total\"",
    class = "character",
    definition = "Everyone"
  )
)

gt::gt(d) %>%
  gt::tab_header(title = "Valid ages in the csverse format") %>%
  gt::cols_label(
    value = "Value",
    definition = "Definition"
  )
```

This format will help your data be easily sorted, kept in the right order, and generate valid variable names if converted to wide-format.

Missing ages should be coded as "missing".

## Sex

Sex should be coded as characters.

```{r echo=FALSE, results='asis'}
d <- rbind(
  data.frame(
    value = "\"male\"",
    class = "character",
    definition = "Male"
  ),
  data.frame(
    value = "\"female\"",
    class = "character",
    definition = "Female"
  ),
  data.frame(
    value = "\"missing\"",
    class = "character",
    definition = "Missing/unknown"
  ),
  data.frame(
    value = "\"total\"",
    class = "character",
    definition = "Everyone"
  )
)

gt::gt(d) %>%
  gt::tab_header(title = "Valid sexes in the csverse format") %>%
  gt::cols_label(
    value = "Value",
    definition = "Definition"
  )
```

Missing sexes should be coded as "missing".