Helper functions for leo.gwas_qc
gwas_qc_helpers.RdInternal utilities used in the GWAS QC pipeline.
Usage
is_complementary(a1st, a2nd)
fetch_indel(df, type = "both")
fetch_non_indel(df)
duplicated_SNP_lines(
df,
type = "rm",
dup_columns = c("SNP"),
group_columns = dup_columns
)
slice1_SNP_lines(df, dup_columns = c("SNP"), group_columns = dup_columns)
fetch_same_direcrtion(df_x2, df_lg)
any_na(df)
get_biallelic_snp(df, A1_col = "A1", A2_col = "A2")Functions
is_complementary(a1st, a2nd)Check if two alleles form an A/T or C/G pair.
fetch_indel(df, type)Filter indels by allele-string length.
fetch_non_indel(df)Keep SNPs with single-base alleles.
get_biallelic_snp(df)Keep SNPs with single-base alleles in both
A1andA2.duplicated_SNP_lines(df, type, dup_columns, group_columns)Get/remove duplicated SNP rows.
slice1_SNP_lines(df, dup_columns, group_columns)Within duplicated groups, keep first row.
fetch_same_direcrtion(df_x2, df_lg)Keep same-direction effects between datasets.
any_na(df)Count NAs per column.
Value
is_complementary: logical vector.fetch_indel,fetch_non_indel,get_biallelic_snp,slice1_SNP_lines,duplicated_SNP_lines("rm"): data frame.duplicated_SNP_lines("get"): data frame withcountcolumn.fetch_same_direcrtion: filtered data frame (df_x2subset).any_na: data frame withcolumn,n_na, andprop_na.
Examples
if (FALSE) { # \dontrun{
# Small demo dataset
df <- data.frame(
SNP = c("rs1","rs2","rs3","rs2"),
A1 = c("A","AT","C","C"),
A2 = c("T","A","G","G"),
OR = c(1.2, 0.9, 1.1, 1.1)
)
# Complementary alleles
is_complementary("A","T") # TRUE
is_complementary("A","G") # FALSE
# Indels and non-indels
fetch_indel(df, "both")
fetch_non_indel(df)
# Duplicates by SNP
duplicated_SNP_lines(df, "get", dup_columns = "SNP")
slice1_SNP_lines(df, dup_columns = "SNP")
# Same-direction effects between two datasets
df2 <- transform(df, OR = c(1.1, 1.3, 0.8, 1.1))
fetch_same_direcrtion(df, df2)
# Count NA by column
any_na(df)
# Keep only biallelic SNP rows
get_biallelic_snp(df)
} # }