Jovialis
Advisor
- Messages
- 9,888
- Reaction score
- 6,794
- Points
- 113
- Ethnic group
- Italian
- Y-DNA haplogroup
- R1b-PF7566>Y227216
- mtDNA haplogroup
- H6a1b7
I used ChatGPT’s Deep Research (o3 mini-high) and Grok 3 Think to analyze Harney et al. (2019)'s PDF supplement as well as the GitHub ADMIXTOOLS2 tutorial to determine best practices for analyzing my modern WGS30x sample alongside AADR 62.0 1240K aDNA data. Over the past few days, I’ve been running tests to develop an optimal, comprehensive script that yields the most accurate results by employing F2 pre-computation, qpWave, and qpAdm, with each function’s parameters set appropriately.
Code:
# Step 1: Define file paths
prefix <- "D:\\Bioinformatics\\01_Admixtools_Dataset\\v62.0_1240K_HO_Jovialis_Plink\\v62.0_1240K_HO_Jovialis"
my_f2_dir <- "D:\\Bioinformatics\\01_Admixtools_Dataset\\my_f2_dir_v62.0_1240K_HO_Jovialis"
# Step 2: Load required libraries
library(admixtools)
library(tidyverse)
# Step 3: Define target and population lists
target <- c('Jovialis')
left <- c('Moldova_MBA_Catacomb.AG', 'Italy_Ordona_Daunian.SG', 'Greece_Crete_HgCharalambos_EMBA.AG')
right <- c("Ethiopia_4500BP.AG", "Russia_UstIshim_IUP.DG", "Russia_Sidelkino_HG.SG", "Russia_Kostenki14_UP.AG.BY.AA", "Russia_AfontovaGora3_UP.AG", "Russia_MA1_UP.SG", "Luxembourg_Mesolithic.AG", "Czechia_Gravettian.AG.BY.AA", "Belgium_GoyetQ116_1_UP.AG", "Spain_Magdalenian_contam.AG", "Georgia_Kotias_Mesolithic.SG", "Iran_GanjDareh_N.AG", "Turkey_Marmara_Barcin_N.AG", "Jordan_PPNB.AG", "Israel_Natufian.AG")
# Step 4: Combine all populations for f2 statistics
mypops <- c(target, left, right)
# Step 5: Generate f2 statistics for qpWave
cat("\nExtracting f2 statistics...\n")
extract_f2(prefix, my_f2_dir, pops = mypops, overwrite = TRUE, maxmiss = 1, blgsize = 0.05)
f2_blocks <- f2_from_precomp(my_f2_dir, pops = mypops, afprod = TRUE)
# Step 6: Run qpWave to validate admixture model complexity
cat("\nRunning qpWave to check rank...\n")
qpwave_results <- qpwave(f2_blocks, left = left, right = right, verbose = TRUE)
# Save qpWave results to file
write.csv(qpwave_results$rankdrop, file = "qpwave_rankdrop_results.csv", row.names = FALSE)
cat("\nqpWave Rank Drop Results:\n")
print(qpwave_results$rankdrop)
# Step 7: Determine whether to proceed with qpAdm
min_p <- min(qpwave_results$rankdrop$p, na.rm = TRUE)
if (!is.na(min_p) && min_p > 0.05) {
cat("\nqpWave indicates that the source populations form a clade (p > 0.05). This suggests that the sources do not exhibit distinct ancestries.\n")
cat("\nqpAdm may not provide meaningful results in this case, but we will proceed for comparison.\n")
# Run qpAdm with allsnps = TRUE using genotype files
results <- qpadm(prefix, left, right, target, allsnps = TRUE)
cat("\nqpAdm Admixture Weights (allsnps = TRUE, may not be meaningful due to clade formation):\n")
print(results$weights)
cat("\nqpAdm Population Drop Results (allsnps = TRUE):\n")
print(results$popdrop)
# Save results to file
write.csv(results$weights, file = "qpadm_weights_clade.csv", row.names = FALSE)
write.csv(results$popdrop, file = "qpadm_popdrop_clade.csv", row.names = FALSE)
} else {
cat("\nqpWave indicates that the source populations do not form a clade (p ≤ 0.05). Proceeding with qpAdm to estimate admixture proportions.\n")
# Run qpAdm with allsnps = TRUE using genotype files
results_true <- qpadm(prefix, left, right, target, allsnps = TRUE)
cat("\nqpAdm Admixture Weights (allsnps = TRUE):\n")
print(results_true$weights)
cat("\nqpAdm Population Drop Results (allsnps = TRUE):\n")
print(results_true$popdrop)
# Save results to file
write.csv(results_true$weights, file = "qpadm_weights_allsnps_TRUE.csv", row.names = FALSE)
write.csv(results_true$popdrop, file = "qpadm_popdrop_allsnps_TRUE.csv", row.names = FALSE)
# Run qpAdm with allsnps = FALSE for comparison
results_false <- qpadm(f2_blocks, left, right, target, allsnps = FALSE)
cat("\nqpAdm Admixture Weights (allsnps = FALSE):\n")
print(results_false$weights)
cat("\nqpAdm Population Drop Results (allsnps = FALSE):\n")
print(results_false$popdrop)
# Save results to file
write.csv(results_false$weights, file = "qpadm_weights_allsnps_FALSE.csv", row.names = FALSE)
write.csv(results_false$popdrop, file = "qpadm_popdrop_allsnps_FALSE.csv", row.names = FALSE)
}
cat("\nAnalysis complete. Check the output CSV files for detailed results.\n")
Last edited: