Refactor: Improve code clarity, modularity, and style

This commit is contained in:
2025-10-20 12:42:14 +08:00
parent 48aa3d475d
commit 374c63b3d2
17 changed files with 1573 additions and 0 deletions

40
CMakeLists.txt Normal file
View File

@@ -0,0 +1,40 @@
cmake_minimum_required(VERSION 3.15)
project(snplib LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
option(USE_MKL "Use Intel MKL" OFF)
option(USE_OPENBLAS "Use OpenBLAS" OFF)
if(USE_MKL)
find_package(MKL REQUIRED)
if(MKL_FOUND)
include_directories(${MKL_INCLUDE_DIRS})
link_libraries(${MKL_LIBRARIES})
endif()
elseif(USE_OPENBLAS)
find_package(OpenBLAS REQUIRED)
if(OPENBLAS_FOUND)
include_directories(${OPENBLAS_INCLUDE_DIRS})
link_libraries(${OPENBLAS_LIBRARIES})
endif()
endif()
include_directories(src)
add_library(snplib
src/data_manage.cc
src/grm.cc
src/ibs.cc
src/king.cc
src/snp.cc
src/statistics.cc
src/ugrm.cc
)
target_include_directories(snplib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
# Example executable
add_executable(example main.cc)
target_link_libraries(example snplib)

42
GEMINI.md Normal file
View File

@@ -0,0 +1,42 @@
# Project: SNPLIB
## 1. Overview
This project (SNPLIB) is a high-performance computing library for Quantitative Genetics developed in C++. The core objective of the project is to leverage modern C++ features and achieve extreme numerical computation performance through optimized math libraries (such as Intel MKL or OpenBLAS).
This project uses CMake for cross-platform build management and strictly adheres to the Google C++ Coding Style.
## 2. Core Tech Stack & Standards
### 2.1 Programming Language
* **C++**: The project will use C++ (C++17 or higher recommended) as the primary development language.
### 2.2 Build System
* **CMake**: This project uses CMake (version 3.15 or higher recommended) as the sole cross-platform build system.
* `CMakeLists.txt` files are responsible for managing all compilation targets, dependency linking, and platform-specific configurations.
### 2.3 Performance & Dependencies
* **BLAS/LAPACK Acceleration**: To achieve high-performance matrix and vector operations, this project relies on the BLAS and LAPACK interfaces.
* **Supported Libraries**:
* **Intel MKL (Math Kernel Library)**: The preferred high-performance library, especially on Intel architectures.
* **OpenBLAS**: A high-performance open-source alternative for BLAS/LAPACK with excellent cross-platform compatibility.
* The CMake configuration will provide options (e.g., `USE_MKL=ON` or `USE_OPENBLAS=ON`) to allow users to select the math library to link against at compile time.
### 2.4 Coding Style
* **Google C++ Coding Style**: This project strictly adheres to the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
* **Formatting**: All C++ code submitted must be formatted using `clang-format` (based on the Google style).
* **Linting**: Using `clang-tidy` is recommended to ensure code quality and style consistency.
## 3. Environment Setup
Before starting the build, you must ensure the following dependencies are installed on your system:
1. **C++ Compiler** (e.g., GCC, Clang, MSVC)
2. **CMake** (version >= 3.15)
3. **Intel MKL** or **OpenBLAS**
* For Intel MKL, ensure the `MKLROOT` environment variable is set, or point to its installation path via the CMake `CMAKE_PREFIX_PATH`.
* For OpenBLAS, ensure CMake can find it via `find_package(OpenBLAS)`.

6
main.cc Normal file
View File

@@ -0,0 +1,6 @@
#include <iostream>
int main() {
std::cout << "Hello, SNPLIB!" << std::endl;
return 0;
}

98
src/data_manage.cc Normal file
View File

@@ -0,0 +1,98 @@
// data_manage.cc
#include "data_manage.h"
#include <cmath>
#include <vector>
#include <array>
#include "snp.h"
namespace snplib {
// Unpacks genotype data from a packed format to a double-precision floating-point
// format.
void UnpackGeno(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *geno_d) {
const std::array<double, 4> geno_table{0.0, 0.0, 1.0, 2.0};
SNP snp(geno, num_samples);
for (size_t i = 0; i < num_snps; ++i) {
auto *snp_geno_d = geno_d + i * num_samples;
snp.UnpackGeno(geno_table, snp_geno_d);
snp += 1;
}
}
// Unpacks genotype data for GRM calculation, standardizing the genotypes.
void UnpackGRMGeno(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *geno_d) {
std::array<double, 4> geno_table{0.0, 0.0, 0.0, 0.0};
SNP snp(geno, num_samples);
for (size_t i = 0; i < num_snps; ++i) {
auto mu = 2.0 * af[i];
auto std = std::sqrt(2.0 * af[i] * (1.0 - af[i]));
geno_table[0] = -mu / std;
geno_table[2] = (1.0 - mu) / std;
geno_table[3] = (2.0 - mu) / std;
auto *snp_geno_d = geno_d + i * num_samples;
snp.UnpackGeno(geno_table, snp_geno_d);
snp += 1;
}
}
// Unpacks genotype data into a -1, 0, 1 format.
void UnpackUGeno(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *geno_d) {
const std::array<double, 4> geno_table{-1.0, 0.0, 0.0, 1.0};
SNP snp(geno, num_samples);
for (size_t i = 0; i < num_snps; ++i) {
auto *snp_geno_d = geno_d + i * num_samples;
snp.UnpackGeno(geno_table, snp_geno_d);
snp += 1;
}
}
// Flips the genotypes of specified SNPs.
void FlipGeno(uint8_t *geno, size_t num_samples, size_t num_snps,
const int32_t *index) {
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps; ++i) {
auto *tmp_g = geno + index[i] * num_bytes;
for (size_t j = 0; j < num_bytes; ++j) {
uint8_t g = ~tmp_g[j];
uint8_t g1 = (g >> 1) & 0x55;
uint8_t g2 = (g << 1) & 0xAA;
tmp_g[j] = g1 + g2;
}
}
}
// Creates a new genotype dataset by selecting a subset of samples.
void Keep(const uint8_t *src_geno, uint8_t *dest_geno, size_t num_src_samples,
size_t num_dest_samples, size_t num_snps, const int32_t *index) {
SNP src_snp(src_geno, num_src_samples);
auto num_full_bytes = num_dest_samples / 4;
auto num_samples_left = num_dest_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t l = 0; l < num_snps; ++l) {
auto *tmp_g = dest_geno + l * num_bytes;
for (size_t i = 0; i < num_full_bytes; ++i) {
uint8_t t = src_snp(index[4 * i]);
t += src_snp(index[4 * i + 1]) << 2;
t += src_snp(index[4 * i + 2]) << 4;
t += src_snp(index[4 * i + 3]) << 6;
tmp_g[i] = t;
}
if (num_samples_left > 0) {
uint8_t t = 0;
for (size_t i = 0; i < num_samples_left; ++i) {
t += src_snp(index[4 * num_full_bytes + i]) << (2 * i);
}
tmp_g[num_full_bytes] = t;
}
src_snp += 1;
}
}
} // namespace snplib

33
src/data_manage.h Normal file
View File

@@ -0,0 +1,33 @@
// data_manage.h
#ifndef SNPLIB_SRC_DATA_MANAGE_H_
#define SNPLIB_SRC_DATA_MANAGE_H_
#include <cstdint>
#include <cstddef>
namespace snplib {
// Unpacks genotype data from a packed format to a double-precision floating-point
// format.
void UnpackGeno(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *geno_d);
// Unpacks genotype data for GRM calculation, standardizing the genotypes.
void UnpackGRMGeno(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *geno_d);
// Unpacks genotype data into a -1, 0, 1 format.
void UnpackUGeno(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *geno_d);
// Flips the genotypes of specified SNPs.
void FlipGeno(uint8_t *geno, size_t num_samples, size_t num_snps,
const int32_t *index);
// Creates a new genotype dataset by selecting a subset of samples.
void Keep(const uint8_t *src_geno, uint8_t *dest_geno, size_t num_src_samples,
size_t num_dest_samples, size_t num_snps, const int32_t *index);
} // namespace snplib
#endif // SNPLIB_SRC_DATA_MANAGE_H_

244
src/grm.cc Normal file
View File

@@ -0,0 +1,244 @@
// grm.cc
#include "grm.h"
#include <algorithm>
#include <cmath>
#include <thread>
#include <vector>
#include "snp.h"
namespace snplib {
namespace {
const uint64_t kSnpMask = 0x1249124912491249ull; // 0x1249=b0001001001001001
// Calculates the SNP correlation table for a given allele frequency.
void CalcSnpCorrelation(double af, std::array<double, 8> &table) {
table[0] = 2.0 * af / (1.0 - af); // 00 00
table[1] = 0.0; // 00 01
table[2] = 1.0 / (1.0 - af) - 2; // 00 10
table[3] = -2.0; // 00 11
table[4] = 0.5 / af / (1.0 - af) - 2.0; // 10 10
table[5] = 1.0 / af - 2.0; // 10 11
table[6] = 2.0 / af - 2.0; // 11 11
table[7] = 0.0; // 01 XX
}
// Updates the lookup table for a block of 20 SNPs.
void UpdateLookupTable(const double *af, double *lookup_table) {
std::array<std::array<double, 8>, 5> tables;
for (size_t i = 0; i < 4; ++i) {
auto *lkt = lookup_table + i * 32768;
for (size_t j = 0; j < 5; j++) {
CalcSnpCorrelation(af[5 * i + j], tables[j]);
}
for (size_t j = 0; j < 32768; ++j) {
lkt[j] = tables[0][j & 7u] + tables[1][j >> 3 & 7u] +
tables[2][j >> 6 & 7u] + tables[3][j >> 9 & 7u] +
tables[4][j >> 12 & 7u];
}
}
}
// Updates the lookup table for the remaining SNPs.
void UpdateLookupTable(const double *af, size_t num_snps_left,
double *lookup_table) {
std::array<std::array<double, 8>, 5> tables;
size_t shorts_left = num_snps_left / 5;
size_t snps_remain = num_snps_left % 5;
for (size_t i = 0; i < shorts_left; ++i) {
auto *lkt = lookup_table + i * 32768;
for (size_t j = 0; j < 5; j++) {
CalcSnpCorrelation(af[5 * i + j], tables[j]);
}
for (size_t j = 0; j < 32768; ++j) {
lkt[j] = tables[0][j & 7u] + tables[1][j >> 3 & 7u] +
tables[2][j >> 6 & 7u] + tables[3][j >> 9 & 7u] +
tables[4][j >> 12 & 7u];
}
}
if (snps_remain != 0u) {
auto *lkt = lookup_table + shorts_left * 32768;
for (size_t j = 0; j < snps_remain; j++) {
CalcSnpCorrelation(af[5 * shorts_left + j], tables[j]);
}
for (size_t j = snps_remain; j < 5; ++j) {
std::fill(tables[j].begin(), tables[j].end(), 0.0);
}
for (size_t j = 0; j < 32768; ++j) {
lkt[j] = tables[0][j & 7u] + tables[1][j >> 3 & 7u] +
tables[2][j >> 6 & 7u] + tables[3][j >> 9 & 7u] +
tables[4][j >> 12 & 7u];
}
}
}
// Creates a mask for the genotype data.
void MaskGeno(const uint64_t *geno64, size_t num_samples, uint64_t *mask64) {
for (size_t i = 0; i < num_samples; ++i) {
uint64_t tmp_geno = ~(geno64[i] ^ kSnpMask);
tmp_geno = (tmp_geno & (tmp_geno >> 1)) & kSnpMask;
mask64[i] = tmp_geno * 7ull;
}
}
// Updates the GRM with the contribution from a block of SNPs.
void UpdateGRM(const uint64_t *geno64, const uint64_t *mask64,
const double *lookup_table, size_t num_samples,
double *matrix) {
for (size_t i = 0; i < num_samples; i++) {
if (mask64[i] == 0u) {
for (size_t j = i; j < num_samples; ++j) {
uint64_t g = (geno64[i] + geno64[j]) | mask64[j];
matrix[i * num_samples + j] +=
lookup_table[g & 0x7fffu] +
lookup_table[32768 + (g >> 16 & 0x7fffu)] +
lookup_table[65536 + (g >> 32 & 0x7fffu)] +
lookup_table[98304 + (g >> 48 & 0x7fffu)];
}
} else {
for (size_t j = i; j < num_samples; ++j) {
uint64_t g = (geno64[i] + geno64[j]) | (mask64[i] | mask64[j]);
matrix[i * num_samples + j] +=
lookup_table[g & 0x7fffu] +
lookup_table[32768 + (g >> 16 & 0x7fffu)] +
lookup_table[65536 + (g >> 32 & 0x7fffu)] +
lookup_table[98304 + (g >> 48 & 0x7fffu)];
}
}
}
}
// Thread function for calculating the GRM.
void CalcGRMMatrixThread(const uint8_t *geno, const double *af,
size_t num_samples, size_t num_snps, double *matrix) {
SNP snp(geno, num_samples);
auto num_blocks = num_snps / 20u;
auto num_snps_left = num_snps % 20u;
std::vector<uint64_t> geno64(num_samples);
std::vector<uint64_t> mask64(num_samples);
std::vector<double> lookup_table(4 * 32768);
std::fill(matrix, matrix + num_samples * num_samples, 0.0);
for (size_t i = 0; i < num_blocks; ++i) {
snp.TransposeGeno(20, geno64.data());
UpdateLookupTable(af, lookup_table.data());
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateGRM(geno64.data(), mask64.data(), lookup_table.data(), num_samples, matrix);
snp += 20;
af += 20;
}
if (num_snps_left > 0) {
snp.TransposeGeno(num_snps_left, geno64.data());
UpdateLookupTable(af, num_snps_left, lookup_table.data());
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateGRM(geno64.data(), mask64.data(), lookup_table.data(), num_samples, matrix);
}
}
// Thread function for calculating the diagonal of the GCTA GRM.
void CalcGCTADiagonalThread(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *diagonal) {
SNP snp(geno, num_samples);
std::vector<double> geno_d(num_samples, 0.0);
std::fill(diagonal, diagonal + num_samples, 0.0);
std::array<double, 4> geno_table{0.0, 0.0, 0.0, 0.0};
for (size_t i = 0; i < num_snps; ++i) {
auto var = 2.0 * af[i] * (1.0 - af[i]);
geno_table[0] = 2.0 * af[i] / var;
geno_table[3] = (2.0 - 2.0 * af[i]) / var;
snp.UnpackGeno(geno_table, geno_d.data());
for (size_t j = 0; j < num_samples; ++j) {
diagonal[j] += geno_d[j];
}
snp += 1;
}
}
} // namespace
// Calculates the Genomic Relationship Matrix (GRM) from genotype data.
void CalcGRMMatrix(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *grm, size_t num_threads) {
std::vector<std::thread> workers;
std::vector<double> matrices(num_samples * num_samples * num_threads);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps_left; ++i) {
workers.emplace_back(CalcGRMMatrixThread, geno, af, num_samples,
num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
af += num_snps_job;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers.emplace_back(CalcGRMMatrixThread, geno, af, num_samples,
num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
af += num_snps_job;
}
for (auto &&iter : workers) {
iter.join();
}
std::fill(grm, grm + num_samples * num_samples, 0.0);
for (size_t k = 0; k < num_threads; ++k) {
auto *tmp_m = matrices.data() + k * num_samples * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
grm[i * num_samples + j] += tmp_m[i * num_samples + j];
}
}
}
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
grm[i * num_samples + j] /= num_snps;
grm[j * num_samples + i] = grm[i * num_samples + j];
}
}
}
// Calculates the diagonal of the GCTA GRM.
void CalcGCTADiagonal(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *diagonal, size_t num_threads) {
std::vector<std::thread> workers;
std::vector<double> diagonals(num_samples * num_threads);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps_left; ++i) {
workers.emplace_back(CalcGCTADiagonalThread, geno, af, num_samples, num_snps_job,
diagonals.data() + i * num_samples);
geno += num_snps_job * num_bytes;
af += num_snps_job;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers.emplace_back(CalcGCTADiagonalThread, geno, af, num_samples, num_snps_job,
diagonals.data() + i * num_samples);
geno += num_snps_job * num_bytes;
af += num_snps_job;
}
for (auto &&iter : workers) {
iter.join();
}
std::fill(diagonal, diagonal + num_samples, 0.0);
for (size_t k = 0; k < num_threads; ++k) {
auto *tmp_d = diagonals.data() + k * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
diagonal[i] += tmp_d[i];
}
}
for (size_t i = 0; i < num_samples; ++i) {
diagonal[i] /= num_snps;
}
}
} // namespace snplib

20
src/grm.h Normal file
View File

@@ -0,0 +1,20 @@
// grm.h
#ifndef SNPLIB_SRC_GRM_H_
#define SNPLIB_SRC_GRM_H_
#include <cstdint>
#include <cstddef>
namespace snplib {
// Calculates the Genomic Relationship Matrix (GRM) from genotype data.
void CalcGRMMatrix(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *grm, size_t num_threads);
// Calculates the diagonal of the GCTA GRM.
void CalcGCTADiagonal(const uint8_t *geno, const double *af, size_t num_samples,
size_t num_snps, double *diagonal, size_t num_threads);
} // namespace snplib
#endif // SNPLIB_SRC_GRM_H_

245
src/ibs.cc Normal file
View File

@@ -0,0 +1,245 @@
// ibs.cc
#include "ibs.h"
#include <algorithm>
#include <thread>
#include <vector>
#include <immintrin.h>
#include "snp.h"
namespace snplib {
namespace {
const uint64_t kIbsMask = 0x5555555555555555ull; // 0x5555=b0101010101010101
// Calculates the number of IBS for a block of 1024 SNPs between two samples.
uint64_t CalcIBSBlock(const __m128i *geno1, const __m128i *mask1,
const __m128i *geno2, const __m128i *mask2) {
uint64_t count = 0;
for (size_t i = 0; i < 16; ++i) {
__m128i g = _mm_xor_si128(_mm_loadu_si128(geno1 + i), _mm_loadu_si128(geno2 + i));
__m128i m = _mm_and_si128(_mm_loadu_si128(mask1 + i), _mm_loadu_si128(mask2 + i));
g = _mm_andnot_si128(g, m);
count += _mm_popcnt_u64(_mm_cvtsi128_si64(g));
g = _mm_srli_si128(g, 8);
count += _mm_popcnt_u64(_mm_cvtsi128_si64(g));
}
return count;
}
// Creates a mask for the genotype data.
void MaskGeno(const uint64_t *geno64, size_t num_samples, uint64_t *mask64) {
for (size_t i = 0; i < 32 * num_samples; ++i) {
uint64_t tmp_geno = geno64[i] ^ kIbsMask;
tmp_geno = (tmp_geno | (tmp_geno >> 1)) & kIbsMask;
mask64[i] = tmp_geno * 3ull;
}
}
// Updates the IBS matrix with the contribution from a block of SNPs.
void UpdateIBSMatrix(const uint64_t *geno64, const uint64_t *mask64,
size_t num_samples, uint64_t *matrix) {
for (size_t i = 0; i < num_samples; ++i) {
auto *g1 = reinterpret_cast<const __m128i *>(geno64 + 32 * i);
auto *m1 = reinterpret_cast<const __m128i *>(mask64 + 32 * i);
for (size_t j = i; j < num_samples; ++j) {
auto *g2 = reinterpret_cast<const __m128i *>(geno64 + 32 * j);
auto *m2 = reinterpret_cast<const __m128i *>(mask64 + 32 * j);
matrix[i * num_samples + j] += CalcIBSBlock(g1, m1, g2, m2);
}
}
}
// Updates the IBS connection with the contribution from a block of SNPs.
void UpdateIBSConnection(const uint64_t *src_geno64, const uint64_t *src_mask64,
const uint64_t *dest_geno64, const uint64_t *dest_mask64,
size_t num_src_samples, size_t num_dest_samples,
uint64_t *connect) {
for (size_t i = 0; i < num_dest_samples; ++i) {
auto *g1 = reinterpret_cast<const __m128i *>(dest_geno64 + 32 * i);
auto *m1 = reinterpret_cast<const __m128i *>(dest_mask64 + 32 * i);
uint64_t result = 0;
for (size_t j = 0; j < num_src_samples; ++j) {
auto *g2 = reinterpret_cast<const __m128i *>(src_geno64 + 32 * j);
auto *m2 = reinterpret_cast<const __m128i *>(src_mask64 + 32 * j);
result += CalcIBSBlock(g1, m1, g2, m2);
}
connect[i] += result;
}
}
// Thread function for calculating the IBS matrix.
void CalcIBSMatrixThread(const uint8_t *geno, size_t num_samples,
size_t num_snps, uint64_t *matrix) {
SNP snp(geno, num_samples);
auto num_blocks = num_snps / 1024u;
auto num_snps_left = num_snps % 1024u;
std::vector<uint64_t> geno64(32 * num_samples);
std::vector<uint64_t> mask64(32 * num_samples);
std::fill(matrix, matrix + num_samples * num_samples, 0ull);
for (size_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < 32; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateIBSMatrix(geno64.data(), mask64.data(), num_samples, matrix);
}
if (num_snps_left > 0) {
num_blocks = num_snps_left / 32;
std::fill(geno64.begin(), geno64.end(), kIbsMask);
for (size_t j = 0; j < num_blocks; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
num_snps_left %= 32;
if (num_snps_left > 0u) {
snp.TransposeGeno(num_snps_left, num_blocks, geno64.data());
}
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateIBSMatrix(geno64.data(), mask64.data(), num_samples, matrix);
}
}
// Thread function for calculating the IBS connection.
void CalcIBSConnectionThread(const uint8_t *src_geno, size_t num_src_samples,
const uint8_t *dest_geno, size_t num_dest_samples,
size_t num_snps, uint64_t *connection) {
SNP src_snp(src_geno, num_src_samples);
SNP dest_snp(dest_geno, num_dest_samples);
auto num_blocks = num_snps / 1024u;
auto num_snps_left = num_snps % 1024u;
std::vector<uint64_t> src_geno64(32 * num_src_samples);
std::vector<uint64_t> src_mask64(32 * num_src_samples);
std::vector<uint64_t> dest_geno64(32 * num_dest_samples);
std::vector<uint64_t> dest_mask64(32 * num_dest_samples);
std::fill(connection, connection + num_dest_samples, 0ull);
for (size_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < 32; ++j) {
src_snp.TransposeGeno(32, j, src_geno64.data());
dest_snp.TransposeGeno(32, j, dest_geno64.data());
src_snp += 32;
dest_snp += 32;
}
MaskGeno(src_geno64.data(), num_src_samples, src_mask64.data());
MaskGeno(dest_geno64.data(), num_dest_samples, dest_mask64.data());
UpdateIBSConnection(src_geno64.data(), src_mask64.data(), dest_geno64.data(),
dest_mask64.data(), num_src_samples, num_dest_samples,
connection);
}
if (num_snps_left > 0u) {
std::fill(src_geno64.begin(), src_geno64.end(), kIbsMask);
std::fill(dest_geno64.begin(), dest_geno64.end(), kIbsMask);
num_blocks = num_snps_left / 32;
for (size_t j = 0; j < num_blocks; ++j) {
src_snp.TransposeGeno(32, j, src_geno64.data());
dest_snp.TransposeGeno(32, j, dest_geno64.data());
src_snp += 32;
dest_snp += 32;
}
num_snps_left %= 32;
if (num_snps_left > 0u) {
src_snp.TransposeGeno(num_snps_left, num_blocks, src_geno64.data());
dest_snp.TransposeGeno(num_snps_left, num_blocks, dest_geno64.data());
}
MaskGeno(src_geno64.data(), num_src_samples, src_mask64.data());
MaskGeno(dest_geno64.data(), num_dest_samples, dest_mask64.data());
UpdateIBSConnection(src_geno64.data(), src_mask64.data(), dest_geno64.data(),
dest_mask64.data(), num_src_samples, num_dest_samples,
connection);
}
}
} // namespace
// Calculates the Identity by State (IBS) matrix from genotype data.
void CalcIBSMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads) {
std::vector<std::thread> workers;
std::vector<uint64_t> matrices(num_samples * num_samples * num_threads);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps_left; ++i) {
workers.emplace_back(CalcIBSMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers.emplace_back(CalcIBSMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
}
for (auto &&iter : workers) {
iter.join();
}
auto *matrix_u = matrices.data();
for (size_t k = 1; k < num_threads; ++k) {
auto *tmp_m = matrices.data() + k * num_samples * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
matrix_u[i * num_samples + j] += tmp_m[i * num_samples + j];
}
}
}
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
matrix[i * num_samples + j] =
static_cast<double>(matrix_u[i * num_samples + j]) / num_snps / 2.0;
matrix[j * num_samples + i] = matrix[i * num_samples + j];
}
}
}
// Calculates the IBS connection between two sets of samples.
void CalcIBSConnection(const uint8_t *src_geno, size_t num_src_samples,
const uint8_t *dest_geno, size_t num_dest_samples,
size_t num_snps, double *connection,
size_t num_threads) {
std::vector<std::thread> workers(num_threads);
std::vector<uint64_t> connects(num_dest_samples * num_threads);
auto num_src_full_bytes = num_src_samples / 4;
auto num_src_samples_left = num_src_samples % 4;
auto num_src_bytes = num_src_full_bytes + (num_src_samples_left > 0 ? 1 : 0);
auto num_dest_full_bytes = num_dest_samples / 4;
auto num_dest_samples_left = num_dest_samples % 4;
auto num_dest_bytes =
num_dest_full_bytes + (num_dest_samples_left > 0 ? 1 : 0);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
for (size_t i = 0; i < num_snps_left; ++i) {
workers[i] = std::thread(CalcIBSConnectionThread, src_geno, num_src_samples,
dest_geno, num_dest_samples, num_snps_job,
connects.data() + i * num_dest_samples);
src_geno += num_snps_job * num_src_bytes;
dest_geno += num_snps_job * num_dest_bytes;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers[i] = std::thread(CalcIBSConnectionThread, src_geno, num_src_samples,
dest_geno, num_dest_samples, num_snps_job,
connects.data() + i * num_dest_samples);
src_geno += num_snps_job * num_src_bytes;
dest_geno += num_snps_job * num_dest_bytes;
}
for (auto &&iter : workers) {
iter.join();
}
auto *connect_u = connects.data();
for (size_t i = 1; i < num_threads; ++i) {
auto *tmp_c = connects.data() + i * num_dest_samples;
for (size_t j = 0; j < num_dest_samples; ++j) {
connect_u[j] += tmp_c[j];
}
}
for (size_t i = 0; i < num_dest_samples; ++i) {
connection[i] = static_cast<double>(connect_u[i]) / num_snps / 2.0;
}
}
} // namespace snplib

21
src/ibs.h Normal file
View File

@@ -0,0 +1,21 @@
// ibs.h
#ifndef SNPLIB_SRC_IBS_H_
#define SNPLIB_SRC_IBS_H_
#include <cstdint>
#include <cstddef>
namespace snplib {
// Calculates the Identity by State (IBS) matrix from genotype data.
void CalcIBSMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads);
// Calculates the IBS connection between two sets of samples.
void CalcIBSConnection(const uint8_t *src_geno, size_t num_src_samples,
const uint8_t *dest_geno, size_t num_dest_samples,
size_t num_snps, double *connection, size_t num_threads);
} // namespace snplib
#endif // SNPLIB_SRC_IBS_H_

205
src/king.cc Normal file
View File

@@ -0,0 +1,205 @@
// king.cc
#include "king.h"
#include <algorithm>
#include <list>
#include <numeric>
#include <thread>
#include <vector>
#include <immintrin.h>
#include "snp.h"
namespace snplib {
namespace {
const uint64_t kHomozygousMask = 0x5555555555555555ull; // 0x5555=b0101010101010101
const uint64_t kHeterozygousMask = 0xAAAAAAAAAAAAAAAAull; // 0xAAAA=b1010101010101010
// Calculates the KING-robust kinship coefficient for a block of 1024 SNPs.
int64_t CalcKINGBlock(const uint64_t *geno1, const uint64_t *geno2) {
int64_t num_hetero = 0;
int64_t num_homo = 0;
for (size_t i = 0; i < 32; ++i) {
auto mask_1 = geno1[i] ^ kHomozygousMask;
mask_1 = (mask_1 | (mask_1 >> 1)) & kHomozygousMask;
mask_1 *= 3ull;
auto mask_2 = geno2[i] ^ kHomozygousMask;
mask_2 = (mask_2 | (mask_2 >> 1)) & kHomozygousMask;
mask_2 *= 3ull;
auto hetero_1 = geno1[i] ^ kHeterozygousMask;
hetero_1 = (hetero_1 | (hetero_1 << 1)) >> 1;
hetero_1 = (hetero_1 & kHomozygousMask) * 3ull;
auto hetero_2 = geno2[i] ^ kHeterozygousMask;
hetero_2 = (hetero_2 | (hetero_2 << 1)) >> 1;
hetero_2 = (hetero_2 & kHomozygousMask) * 3ull;
num_hetero += _mm_popcnt_u64(~(hetero_1 | hetero_2));
auto geno = geno1[i] ^ geno2[i];
geno &= mask_1 & mask_2 & hetero_1 & hetero_2;
num_homo += _mm_popcnt_u64(geno);
}
return num_hetero - 2 * num_homo;
}
// Calculates the number of heterozygous SNPs for a sample.
uint64_t CalcHeteroCount(const uint64_t *geno) {
uint64_t num_hetero = 0;
for (size_t i = 0; i < 32; ++i) {
auto mask = geno[i] ^ kHomozygousMask;
mask = (mask | (mask >> 1)) & kHomozygousMask;
mask *= 3ull;
auto hetero = geno[i] ^ kHeterozygousMask;
hetero = (hetero | (hetero << 1)) >> 1;
hetero = (hetero & mask) * 3ull;
num_hetero += _mm_popcnt_u64(~hetero);
}
return num_hetero;
}
// Updates the KING matrix with the contribution from a block of SNPs.
void UpdateKINGMatrix(const uint64_t *geno64, size_t num_samples, int64_t *matrix) {
for (size_t i = 0; i < num_samples; ++i) {
auto *g1 = geno64 + 32 * i;
for (size_t j = i + 1; j < num_samples; ++j) {
auto *g2 = geno64 + 32 * j;
matrix[i * num_samples + j] += CalcKINGBlock(g1, g2);
}
}
}
// Updates the heterozygous count vector with the contribution from a block of
// SNPs.
void UpdateHeteroCount(const uint64_t *geno64, size_t num_samples,
uint64_t *vector) {
for (size_t i = 0; i < num_samples; ++i) {
auto *g1 = geno64 + 32 * i;
vector[i] += CalcHeteroCount(g1);
}
}
// Thread function for calculating the KING matrix.
void CalcKINGMatrixThread(const uint8_t *geno, size_t num_samples, size_t num_snps,
int64_t *matrix, uint64_t *vector) {
SNP snp(geno, num_samples);
auto num_blocks = num_snps / 1024u;
auto num_snps_left = num_snps % 1024u;
std::vector<uint64_t> geno64(32 * num_samples);
std::fill(matrix, matrix + num_samples * num_samples, 0ull);
std::fill(vector, vector + num_samples, 0ull);
for (size_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < 32; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
UpdateHeteroCount(geno64.data(), num_samples, vector);
UpdateKINGMatrix(geno64.data(), num_samples, matrix);
}
if (num_snps_left > 0) {
num_blocks = num_snps_left / 32;
std::fill(geno64.begin(), geno64.end(), kHomozygousMask);
for (size_t j = 0; j < num_blocks; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
num_snps_left %= 32;
if (num_snps_left > 0u) {
snp.TransposeGeno(num_snps_left, num_blocks, geno64.data());
}
UpdateHeteroCount(geno64.data(), num_samples, vector);
UpdateKINGMatrix(geno64.data(), num_samples, matrix);
}
}
} // namespace
// Calculates the KING-robust kinship coefficient matrix.
void CalcKINGMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads) {
std::vector<std::thread> workers;
std::vector<int64_t> matrices(num_samples * num_samples * num_threads);
std::vector<uint64_t> vectors(num_samples * num_threads);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps_left; ++i) {
workers.emplace_back(CalcKINGMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples,
vectors.data() + i * num_samples);
geno += num_snps_job * num_bytes;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers.emplace_back(CalcKINGMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples,
vectors.data() + i * num_samples);
geno += num_snps_job * num_bytes;
}
for (auto &&iter : workers) {
iter.join();
}
auto *matrix_u = matrices.data();
for (size_t k = 1; k < num_threads; ++k) {
auto *tmp_m = matrices.data() + k * num_samples * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
matrix_u[i * num_samples + j] += tmp_m[i * num_samples + j];
}
}
}
auto *vector_u = vectors.data();
for (size_t k = 1; k < num_threads; ++k) {
auto *tmp_v = vectors.data() + k * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
vector_u[i] += tmp_v[i];
}
}
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i + 1; j < num_samples; ++j) {
matrix[i * num_samples + j] =
static_cast<double>(matrix_u[i * num_samples + j]) /
(vector_u[i] + vector_u[j]);
matrix[j * num_samples + i] = matrix[i * num_samples + j];
}
}
}
// Finds a group of unrelated individuals from a kinship matrix.
std::vector<int32_t> FindUnrelatedGroup(const double *matrix,
size_t num_samples, double threshold) {
std::vector<int> R(num_samples * num_samples, 0);
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i + 1; j < num_samples; ++j) {
R[i * num_samples + j] = matrix[i * num_samples + j] > threshold ? 1 : 0;
R[j * num_samples + i] = R[i * num_samples + j];
}
}
std::vector<int> r(num_samples);
for (size_t i = 0; i < num_samples; ++i) {
r[i] = std::accumulate(R.data() + i * num_samples, R.data() + (i + 1) * num_samples, 0);
}
std::vector<int32_t> I;
while (std::any_of(r.begin(), r.end(), [](int a) { return a > 0; })) {
auto idx = std::max_element(r.begin(), r.end()) - r.begin();
for (size_t i = 0; i < num_samples; ++i) {
R[idx * num_samples + i] = 0;
R[i * num_samples + idx] = 0;
}
for (size_t i = 0; i < num_samples; ++i) {
r[i] = std::accumulate(R.data() + i * num_samples, R.data() + (i + 1) * num_samples, 0);
}
I.emplace_back(idx);
}
std::vector<int32_t> result;
for (int i = 0; i < num_samples; ++i) {
auto iter = std::find(I.begin(), I.end(), i);
if (iter == I.end()) {
result.emplace_back(i);
}
}
return result;
}
} // namespace snplib

21
src/king.h Normal file
View File

@@ -0,0 +1,21 @@
// king.h
#ifndef SNPLIB_SRC_KING_H_
#define SNPLIB_SRC_KING_H_
#include <cstdint>
#include <cstddef>
#include <vector>
namespace snplib {
// Calculates the KING-robust kinship coefficient matrix.
void CalcKINGMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads);
// Finds a group of unrelated individuals from a kinship matrix.
std::vector<int32_t> FindUnrelatedGroup(const double *matrix,
size_t num_samples, double threshold);
} // namespace snplib
#endif // SNPLIB_SRC_KING_H_

134
src/snp.cc Normal file
View File

@@ -0,0 +1,134 @@
// snp.cc
#include "snp.h"
#include <array>
#include <cstdint>
namespace snplib {
namespace {
// Constants for genotype data conversion.
const uint64_t kUgMask = 0x0303030303030303ull; // 0303 = 0000001100000011
const uint64_t kSgMask = 0x0003000300030003ull; // 0003 = 0000000000000011
// Converts a 4x64-bit genotype block to a single 64-bit value.
uint64_t ConvertGenoTo64(const std::array<uint64_t, 4> &g64) {
uint64_t geno64;
geno64 = g64[3] & kUgMask;
geno64 <<= 2;
geno64 |= g64[2] & kUgMask;
geno64 <<= 2;
geno64 |= g64[1] & kUgMask;
geno64 <<= 2;
geno64 |= g64[0] & kUgMask;
return geno64;
}
// Converts a 5x64-bit genotype block to a single 64-bit value.
uint64_t ConvertGenoTo64(const std::array<uint64_t, 5> &g64) {
uint64_t geno64;
geno64 = g64[4] & kSgMask;
geno64 <<= 3;
geno64 |= g64[3] & kSgMask;
geno64 <<= 3;
geno64 |= g64[2] & kSgMask;
geno64 <<= 3;
geno64 |= g64[1] & kSgMask;
geno64 <<= 3;
geno64 |= g64[0] & kSgMask;
return geno64;
}
} // namespace
SNP::SNP(const uint8_t *geno, size_t num_samples)
: geno_(geno),
num_samples_(num_samples),
num_full_bytes_(num_samples / 4),
num_samples_left_(num_samples_ % 4),
num_bytes_(num_full_bytes_ + (num_samples_left_ != 0 ? 1 : 0)) {}
void SNP::ConvertGeno(size_t num_snps, size_t idx,
std::array<uint64_t, 4> &g64) {
uint64_t g8[32] = {0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull};
for (size_t i = 0; i < num_snps; ++i) {
const auto *tmp_g = geno_ + i * num_bytes_;
g8[i] = static_cast<uint64_t>(tmp_g[idx]);
}
for (size_t i = 0; i < 4; ++i) {
g64[i] = g8[i] + (g8[4 + i] << 8) + (g8[8 + i] << 16) + (g8[12 + i] << 24) +
(g8[16 + i] << 32) + (g8[20 + i] << 40) + (g8[24 + i] << 48) +
(g8[28 + i] << 56);
}
}
void SNP::ConvertGeno(size_t num_snps, size_t idx,
std::array<uint64_t, 5> &g64) {
uint64_t g8[20] = {0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull,
0x55ull, 0x55ull, 0x55ull, 0x55ull, 0x55ull};
for (size_t i = 0; i < num_snps; ++i) {
const auto *tmp_g = geno_ + i * num_bytes_;
g8[i] = static_cast<uint64_t>(tmp_g[idx]);
}
for (size_t i = 0; i < 5; ++i) {
g64[i] =
g8[i] + (g8[5 + i] << 16) + (g8[10 + i] << 32) + (g8[15 + i] << 48);
}
}
void SNP::TransposeGeno(size_t num_snps, size_t idx, uint64_t *geno64) {
std::array<uint64_t, 4> g64;
for (size_t i = 0; i < num_full_bytes_; ++i) {
ConvertGeno(num_snps, i, g64);
geno64[32 * (4 * i) + idx] = ConvertGenoTo64(g64);
for (size_t j = 1; j < 4; ++j) {
for (auto &iter : g64) {
iter >>= 2;
}
geno64[32 * (4 * i + j) + idx] = ConvertGenoTo64(g64);
}
}
if (num_samples_left_ > 0) {
ConvertGeno(num_snps, num_full_bytes_, g64);
geno64[32 * (4 * num_full_bytes_) + idx] = ConvertGenoTo64(g64);
for (size_t j = 1; j < num_samples_left_; ++j) {
for (auto &iter : g64) {
iter >>= 2;
}
geno64[32 * (4 * num_full_bytes_ + j) + idx] = ConvertGenoTo64(g64);
}
}
}
void SNP::TransposeGeno(size_t num_snps, uint64_t *geno64) {
std::array<uint64_t, 5> g64;
for (size_t i = 0; i < num_full_bytes_; ++i) {
ConvertGeno(num_snps, i, g64);
geno64[4 * i] = ConvertGenoTo64(g64);
for (size_t j = 1; j < 4; ++j) {
for (auto &iter : g64) {
iter >>= 2;
}
geno64[4 * i + j] = ConvertGenoTo64(g64);
}
}
if (num_samples_left_ > 0) {
ConvertGeno(num_snps, num_full_bytes_, g64);
geno64[4 * num_full_bytes_] = ConvertGenoTo64(g64);
for (size_t j = 1; j < num_samples_left_; ++j) {
for (auto &iter : g64) {
iter >>= 2;
}
geno64[4 * num_full_bytes_ + j] = ConvertGenoTo64(g64);
}
}
}
} // namespace snplib

93
src/snp.h Normal file
View File

@@ -0,0 +1,93 @@
// snp.h
#ifndef SNPLIB_SRC_SNP_H_
#define SNPLIB_SRC_SNP_H_
#include <array>
#include <cstdint>
#include <cstring>
namespace snplib {
// The SNP class provides a view of a single SNP's genotype data.
// It is designed to work with packed genotype data where each sample's genotype
// is stored in 2 bits. The class provides methods for unpacking the genotype
// data, transposing it, and performing other operations.
class SNP {
public:
// Constructor for the SNP class.
//
// @param geno A pointer to the packed genotype data.
// @param num_samples The number of samples.
SNP(const uint8_t *geno, size_t num_samples);
SNP(const SNP &) = default;
SNP(SNP &&) = default;
SNP &operator=(const SNP &) = delete;
SNP &operator=(SNP &&) = delete;
~SNP() = default;
// Returns the genotype value for a given sample index.
//
// @param idx The index of the sample.
// @return The genotype value (0, 1, 2, or 3).
uint8_t operator()(size_t idx) const;
// Advances the SNP pointer to the next SNP.
//
// @param idx The number of SNPs to advance.
// @return A reference to the current SNP object.
SNP &operator+=(size_t idx);
// Copies the SNP's genotype data to a destination buffer.
//
// @param dest A pointer to the destination buffer.
template <class T>
void Copy(T *dest) const {
memcpy(static_cast<void *>(dest), static_cast<const void *>(geno_),
sizeof(uint8_t) * num_bytes_);
}
// Unpacks the genotype data into a destination buffer.
//
// @param geno_table A lookup table for converting genotype values.
// @param geno A pointer to the destination buffer.
template <class T>
void UnpackGeno(const std::array<T, 4> &geno_table, T *geno);
// Unpacks the genotype data into two destination buffers (genotype and mask).
//
// @param geno_table A lookup table for converting genotype values.
// @param mask_table A lookup table for creating the mask.
// @param geno A pointer to the genotype destination buffer.
// @param mask A pointer to the mask destination buffer.
template <class T>
void UnpackGeno(const std::array<T, 4> &geno_table,
const std::array<T, 4> &mask_table, T *geno, T *mask);
// Transposes the genotype data for a block of SNPs.
//
// @param num_snps The number of SNPs to transpose.
// @param idx The index of the current SNP within the block.
// @param geno64 A pointer to the destination buffer.
void TransposeGeno(size_t num_snps, size_t idx, uint64_t *geno64);
// Transposes the genotype data for a block of SNPs.
//
// @param num_snps The number of SNPs to transpose.
// @param geno64 A pointer to the destination buffer.
void TransposeGeno(size_t num_snps, uint64_t *geno64);
private:
const uint8_t *geno_ = nullptr;
size_t num_samples_ = 0;
size_t num_full_bytes_ = 0;
size_t num_samples_left_ = 0;
size_t num_bytes_ = 0;
void ConvertGeno(size_t num_snps, size_t idx, std::array<uint64_t, 4> &g64);
void ConvertGeno(size_t num_snps, size_t idx, std::array<uint64_t, 5> &g64);
};
} // namespace snplib
#endif // SNPLIB_SRC_SNP_H_

188
src/statistics.cc Normal file
View File

@@ -0,0 +1,188 @@
// statistics.cc
#include "statistics.h"
#include <atomic>
#include <cmath>
#include <thread>
#include <vector>
#include <immintrin.h>
#include "snp.h"
namespace snplib {
namespace {
std::atomic_size_t global_snpc_index;
const uint64_t kHomozygousMask = 0x5555555555555555ull; // 0x5555=b0101010101010101
const uint64_t kHeterozygousMask = 0xAAAAAAAAAAAAAAAAull; // 0xAAAA=b1010101010101010
// Calculates the LD R^2 between two SNPs.
double CalcLDR2(const uint64_t *geno_1, const uint64_t *geno_2, double af1,
double af2, size_t num_words) {
uint64_t num_samples = 0;
uint64_t n11 = 0;
uint64_t n12 = 0;
uint64_t n21 = 0;
uint64_t n22 = 0;
uint64_t n_homo_1 = 0;
uint64_t n_homo_2 = 0;
uint64_t n_hetero_1 = 0;
uint64_t n_hetero_2 = 0;
for (size_t i = 0; i < num_words; ++i) {
uint64_t mask_1 = geno_1[i] ^ kHomozygousMask;
mask_1 = (mask_1 | (mask_1 >> 1)) & kHomozygousMask;
uint64_t mask_2 = geno_2[i] ^ kHomozygousMask;
mask_2 = (mask_2 | (mask_2 >> 1)) & kHomozygousMask;
auto mask = mask_1 & mask_2;
mask *= 3ull;
uint64_t homo_1 = ((geno_1[i] ^ kHeterozygousMask) & kHomozygousMask) & mask;
uint64_t homo_2 = ((geno_2[i] ^ kHeterozygousMask) & kHomozygousMask) & mask;
uint64_t hetero_1 = geno_1[i] ^ kHomozygousMask;
hetero_1 = ((hetero_1 & (hetero_1 >> 1)) & kHomozygousMask) & mask;
uint64_t hetero_2 = geno_2[i] ^ kHomozygousMask;
hetero_2 = ((hetero_2 & (hetero_2 >> 1)) & kHomozygousMask) & mask;
num_samples += _mm_popcnt_u64(mask);
mask = homo_1 & homo_2;
n11 += _mm_popcnt_u64(mask);
mask = homo_1 & hetero_2;
n12 += _mm_popcnt_u64(mask);
mask = hetero_1 & homo_2;
n21 += _mm_popcnt_u64(mask);
mask = hetero_1 & hetero_2;
n22 += _mm_popcnt_u64(mask);
n_homo_1 += _mm_popcnt_u64(homo_1);
n_homo_2 += _mm_popcnt_u64(homo_2);
n_hetero_1 += _mm_popcnt_u64(hetero_1);
n_hetero_2 += _mm_popcnt_u64(hetero_2);
}
num_samples /= 2;
auto n13 = n_homo_1 - n11 - n12;
auto n23 = n_hetero_1 - n21 - n22;
auto n31 = n_homo_2 - n11 - n21;
auto n32 = n_hetero_2 - n12 - n22;
auto n33 = num_samples - n11 - n12 - n13 - n21 - n22 - n23 - n31 - n32;
auto r2 = static_cast<double>(n11) * (2.0 - 2.0 * af1) * (2.0 - 2.0 * af2);
r2 += static_cast<double>(n21) * (1.0 - 2.0 * af1) * (2.0 - 2.0 * af2);
r2 -= static_cast<double>(n31) * 2.0 * af1 * (2.0 - 2.0 * af2);
r2 += static_cast<double>(n12) * (2.0 - 2.0 * af1) * (1.0 - 2.0 * af2);
r2 += static_cast<double>(n22) * (1.0 - 2.0 * af1) * (1.0 - 2.0 * af2);
r2 -= static_cast<double>(n32) * 2.0 * af1 * (1.0 - 2.0 * af2);
r2 -= static_cast<double>(n13) * (2.0 - 2.0 * af1) * 2.0 * af2;
r2 -= static_cast<double>(n23) * (1.0 - 2.0 * af1) * 2.0 * af2;
r2 += static_cast<double>(n33) * 2.0 * af1 * 2.0 * af2;
r2 /= 2.0 * std::sqrt(af1 * af2 * (1.0 - af1) * (1.0 - af2));
r2 /= (num_samples - 1);
return r2 * r2;
}
// Thread function for calculating LD scores.
void CalcLDScoresThread(const uint8_t *geno, const int32_t *bp,
const double *af, size_t num_snps, size_t num_samples,
size_t window_size, double r2_threshold, double *ldcv) {
auto local_ind = global_snpc_index++;
auto ws = static_cast<int32_t>(window_size / 2);
auto num_words = num_samples / 32 + (num_samples % 32 > 0 ? 1 : 0);
std::vector<uint64_t> geno_1(num_words, kHomozygousMask);
std::vector<uint64_t> geno_2(num_words, kHomozygousMask);
while (local_ind < num_snps) {
SNP snp1(geno, num_samples);
snp1 += local_ind;
snp1.Copy(geno_1.data());
auto lower = bp[local_ind] - ws;
lower = lower > 0 ? lower : 0;
auto upper = bp[local_ind] + ws;
size_t first_ind = 0;
while (bp[first_ind] < lower) {
first_ind++;
}
size_t last_ind = first_ind;
while ((bp[last_ind] <= upper) && (last_ind < num_snps)) {
last_ind++;
}
double cv = 0.0;
for (size_t i = first_ind; i < last_ind; ++i) {
SNP snp2(geno, num_samples);
snp2 += i;
snp2.Copy(geno_2.data());
auto r2 = CalcLDR2(geno_1.data(), geno_2.data(), af[local_ind], af[i], num_words);
cv += r2 > r2_threshold ? r2 : 0;
}
ldcv[local_ind] = cv;
local_ind = global_snpc_index++;
}
}
} // namespace
// Calculates the allele frequencies for a set of SNPs.
void CalcAlleleFrequencies(const uint8_t *geno, size_t num_samples,
size_t num_snps, double *af) {
auto num_words_snp = num_samples / 32 + ((num_samples % 32) != 0u ? 1 : 0);
auto num_bytes = num_samples / 4 + ((num_samples % 4) != 0 ? 1 : 0);
auto correct = 4 * num_bytes - num_samples;
correct *= 2;
std::vector<uint64_t> geno64(num_words_snp, kHomozygousMask);
for (size_t i = 0; i < num_snps; ++i) {
memcpy(static_cast<void *>(geno64.data()),
static_cast<const void *>(geno + i * num_bytes),
sizeof(uint8_t) * num_bytes);
uint64_t alleles = 0;
uint64_t nonmissings = 0;
for (size_t j = 0; j < num_words_snp; ++j) {
uint64_t tmp_g = geno64[j] ^ kHomozygousMask;
uint64_t tmp_m = (tmp_g | (tmp_g >> 1)) & kHomozygousMask;
tmp_m *= 3;
nonmissings += _mm_popcnt_u64(tmp_m);
tmp_g = tmp_m & geno64[j];
alleles += _mm_popcnt_u64(tmp_g);
}
nonmissings -= correct;
af[i] = static_cast<double>(alleles) / nonmissings;
}
}
// Calculates the missing rate for a set of SNPs.
void CalcMissingRates(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *missing) {
auto num_words_snp = num_samples / 32 + ((num_samples % 32) != 0u ? 1 : 0);
auto num_bytes = num_samples / 4 + ((num_samples % 4) != 0 ? 1 : 0);
auto correct = 4 * num_bytes - num_samples;
correct *= 2;
std::vector<uint64_t> geno64(num_words_snp, kHomozygousMask);
for (size_t i = 0; i < num_snps; ++i) {
memcpy(static_cast<void *>(geno64.data()),
static_cast<const void *>(geno + i * num_bytes),
sizeof(uint8_t) * num_bytes);
uint64_t alleles = 0;
uint64_t nonmissings = 0;
for (size_t j = 0; j < num_words_snp; ++j) {
uint64_t tmp_g = geno64[j] ^ kHomozygousMask;
uint64_t tmp_m = (tmp_g | (tmp_g >> 1)) & kHomozygousMask;
tmp_m *= 3;
nonmissings += _mm_popcnt_u64(tmp_m);
tmp_g = tmp_m & geno64[j];
alleles += _mm_popcnt_u64(tmp_g);
}
nonmissings -= correct;
missing[i] = 1.0 - static_cast<double>(nonmissings) / num_samples;
}
}
// Calculates the LD scores for a set of SNPs.
void CalcLDScores(const uint8_t *geno, const int32_t *bp, const double *af,
size_t num_snps, size_t num_samples, size_t window_size,
double r2_threshold, double *ldcv, size_t num_threads) {
std::vector<std::thread> workers;
global_snpc_index = 0;
for (size_t i = 0; i < num_threads; ++i) {
workers.emplace_back(CalcLDScoresThread, geno, bp, af, num_snps,
num_samples, window_size, r2_threshold, ldcv);
}
for (auto &&iter : workers) {
iter.join();
}
}
} // namespace snplib

25
src/statistics.h Normal file
View File

@@ -0,0 +1,25 @@
// statistics.h
#ifndef SNPLIB_SRC_STATISTICS_H_
#define SNPLIB_SRC_STATISTICS_H_
#include <cstdint>
#include <cstddef>
namespace snplib {
// Calculates the allele frequencies for a set of SNPs.
void CalcAlleleFrequencies(const uint8_t *geno, size_t num_samples,
size_t num_snps, double *af);
// Calculates the missing rate for a set of SNPs.
void CalcMissingRates(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *missing);
// Calculates the LD scores for a set of SNPs.
void CalcLDScores(const uint8_t *geno, const int32_t *bp, const double *af,
size_t num_snps, size_t num_samples, size_t window_size,
double r2_threshold, double *ldcv, size_t num_threads);
} // namespace snplib
#endif // SNPLIB_SRC_STATISTICS_H_

142
src/ugrm.cc Normal file
View File

@@ -0,0 +1,142 @@
// ugrm.cc
#include "ugrm.h"
#include <algorithm>
#include <thread>
#include <vector>
#include <immintrin.h>
#include "snp.h"
namespace snplib {
namespace {
const uint64_t kHomozygousMask = 0x5555555555555555ull; // 0x5555=b0101010101010101
const uint64_t kHeterozygousMask = 0xAAAAAAAAAAAAAAAAull; // 0xAAAA=b1010101010101010
// Calculates the U-GRM for a block of 1024 SNPs between two samples.
int64_t CalcUGRMBlock(const __m128i *geno1, const __m128i *mask1,
const __m128i *geno2, const __m128i *mask2) noexcept {
int64_t count_same = 0;
int64_t count_diff = 0;
for (size_t i = 0; i < 16; ++i) {
__m128i g = _mm_xor_si128(_mm_loadu_si128(geno1 + i), _mm_loadu_si128(geno2 + i));
__m128i m = _mm_and_si128(_mm_loadu_si128(mask1 + i), _mm_loadu_si128(mask2 + i));
__m128i g_same = _mm_andnot_si128(g, m);
__m128i g_diff = _mm_and_si128(g, m);
count_same += _mm_popcnt_u64(_mm_cvtsi128_si64(g_same));
count_diff += _mm_popcnt_u64(_mm_cvtsi128_si64(g_diff));
g_same = _mm_srli_si128(g_same, 8);
g_diff = _mm_srli_si128(g_diff, 8);
count_same += _mm_popcnt_u64(_mm_cvtsi128_si64(g_same));
count_diff += _mm_popcnt_u64(_mm_cvtsi128_si64(g_diff));
}
return count_same - count_diff;
}
// Creates a mask for the genotype data.
void MaskGeno(const uint64_t *geno64, size_t num_samples, uint64_t *mask64) {
for (size_t i = 0; i < 32 * num_samples; ++i) {
uint64_t tmp_geno = geno64[i] ^ kHomozygousMask;
tmp_geno = (tmp_geno | (tmp_geno >> 1)) & kHomozygousMask;
auto mask_1 = tmp_geno * 3ull;
tmp_geno = geno64[i] ^ kHeterozygousMask;
tmp_geno = (tmp_geno | (tmp_geno << 1)) >> 1;
auto mask_2 = (tmp_geno & kHomozygousMask) * 3ull;
mask64[i] = mask_1 & mask_2;
}
}
// Updates the U-GRM matrix with the contribution from a block of SNPs.
void UpdateUGRMMatrix(const uint64_t *geno64, const uint64_t *mask64,
size_t num_samples, int64_t *matrix) {
for (size_t i = 0; i < num_samples; ++i) {
auto *g1 = reinterpret_cast<const __m128i *>(geno64 + 32 * i);
auto *m1 = reinterpret_cast<const __m128i *>(mask64 + 32 * i);
for (size_t j = i; j < num_samples; ++j) {
auto *g2 = reinterpret_cast<const __m128i *>(geno64 + 32 * j);
auto *m2 = reinterpret_cast<const __m128i *>(mask64 + 32 * j);
matrix[i * num_samples + j] += CalcUGRMBlock(g1, m1, g2, m2);
}
}
}
// Thread function for calculating the U-GRM matrix.
void CalcUGRMMatrixThread(const uint8_t *geno, size_t num_samples,
size_t num_snps, int64_t *matrix) {
SNP snp(geno, num_samples);
auto num_blocks = num_snps / 1024u;
auto num_snps_left = num_snps % 1024u;
std::vector<uint64_t> geno64(32 * num_samples);
std::vector<uint64_t> mask64(32 * num_samples);
std::fill(matrix, matrix + num_samples * num_samples, 0ull);
for (size_t i = 0; i < num_blocks; ++i) {
for (size_t j = 0; j < 32; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateUGRMMatrix(geno64.data(), mask64.data(), num_samples, matrix);
}
if (num_snps_left > 0) {
num_blocks = num_snps_left / 32;
std::fill(geno64.begin(), geno64.end(), kHomozygousMask);
for (size_t j = 0; j < num_blocks; ++j) {
snp.TransposeGeno(32, j, geno64.data());
snp += 32;
}
num_snps_left %= 32;
if (num_snps_left > 0u) {
snp.TransposeGeno(num_snps_left, num_blocks, geno64.data());
}
MaskGeno(geno64.data(), num_samples, mask64.data());
UpdateUGRMMatrix(geno64.data(), mask64.data(), num_samples, matrix);
}
}
} // namespace
// Calculates the U-GRM (Genomic Relationship Matrix) from genotype data.
void CalcUGRMMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads) {
std::vector<std::thread> workers;
std::vector<int64_t> matrices(num_samples * num_samples * num_threads);
auto num_snps_job = num_snps / num_threads + 1;
auto num_snps_left = num_snps % num_threads;
auto num_full_bytes = num_samples / 4;
auto num_samples_left = num_samples % 4;
auto num_bytes = num_full_bytes + (num_samples_left > 0 ? 1 : 0);
for (size_t i = 0; i < num_snps_left; ++i) {
workers.emplace_back(CalcUGRMMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
}
--num_snps_job;
for (size_t i = num_snps_left; i < num_threads; ++i) {
workers.emplace_back(CalcUGRMMatrixThread, geno, num_samples, num_snps_job,
matrices.data() + i * num_samples * num_samples);
geno += num_snps_job * num_bytes;
}
for (auto &&iter : workers) {
iter.join();
}
auto *matrix_u = matrices.data();
for (size_t k = 1; k < num_threads; ++k) {
auto *tmp_m = matrices.data() + k * num_samples * num_samples;
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
matrix_u[i * num_samples + j] += tmp_m[i * num_samples + j];
}
}
}
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = i; j < num_samples; ++j) {
matrix[i * num_samples + j] =
static_cast<double>(matrix_u[i * num_samples + j]) / num_snps / 2.0;
matrix[j * num_samples + i] = matrix[i * num_samples + j];
}
}
}
} // namespace snplib

16
src/ugrm.h Normal file
View File

@@ -0,0 +1,16 @@
// ugrm.h
#ifndef SNPLIB_SRC_UGRM_H_
#define SNPLIB_SRC_UGRM_H_
#include <cstdint>
#include <cstddef>
namespace snplib {
// Calculates the U-GRM (Genomic Relationship Matrix) from genotype data.
void CalcUGRMMatrix(const uint8_t *geno, size_t num_samples, size_t num_snps,
double *matrix, size_t num_threads);
} // namespace snplib
#endif // SNPLIB_SRC_UGRM_H_