# 编译

cmake -DCMAKE_BUILD_TYPE=Debug -B build
cmake --build build -j 8

出现报错 [CMake is not able to find BOOST libraries](https://stackoverflow.com/questions/24173330/cmake-is-not-able-to-find-boost-libraries) , 解决 sudo apt-get install cmake libblkid-dev e2fslibs-dev libboost-all-dev libaudit-dev

# 基于内存方案

构建索引的参数:

1data_type、2dist_fn、3data_file、4index_path_prefix、5max_degree、6size_of_build_search_list、7search_DRAM_budget、8build_DRAM_budget、9 num_thread、10PQ_disk_bytes

# Index 构建函数

template<typename T, typename TagT>
  Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
                        const bool        dynamic_index,
                        const Parameters &indexParams,
                        const Parameters &searchParams, const bool enable_tags,
                        const bool support_eager_delete)
      : Index(m, dim, max_points, dynamic_index, enable_tags, support_eager_delete) {  // Thank you C++ 11!
    _indexingQueueSize = indexParams.Get<uint32_t>("L");
    _indexingRange = indexParams.Get<uint32_t>("R");
    _indexingMaxC = indexParams.Get<uint32_t>("C");
    _indexingAlpha = indexParams.Get<float>("alpha");
    uint32_t num_threads_srch = searchParams.Get<uint32_t>("num_threads");
    uint32_t num_threads_indx = indexParams.Get<uint32_t>("num_threads");
    uint32_t num_threads = diskann_max(num_threads_srch, num_threads_indx);
    uint32_t search_l = searchParams.Get<uint32_t>("L");
    initialize_query_scratch(num_threads, search_l, _indexingQueueSize,
                             _indexingRange, dim);
  }
  template<typename T, typename TagT>
  Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
                        const bool dynamic_index,
                        const bool enable_tags, const bool support_eager_delete)
      : _dist_metric(m), _dim(dim), _max_points(max_points),
        _dynamic_index(dynamic_index), _enable_tags(enable_tags),
        _support_eager_delete(support_eager_delete) {
    if (dynamic_index && !enable_tags) {
      throw diskann::ANNException(
          "ERROR: Eager Deletes must have Dynamic Indexing enabled.", -1,
          __FUNCSIG__, __FILE__, __LINE__);
      diskann::cerr
          << "WARNING: Dynamic Indices must have tags enabled. Auto-enabling."
          << std::endl;
      _enable_tags = true;
    }
    if (support_eager_delete && !dynamic_index) {
      diskann::cerr << "ERROR: Eager Deletes must have Dynamic Indexing "
                       "enabled. Exitting."
                    << std::endl;
      throw diskann::ANNException(
          "ERROR: Eager deletes are possible only if dynamic indexing is "
          "enabled. Exiting.",
          -1, __FUNCSIG__, __FILE__, __LINE__);
    }
    // data is stored to _nd * aligned_dim matrix with necessary
    // zero-padding
    _aligned_dim = ROUND_UP(_dim, 8);
    if (dynamic_index) {
      _num_frozen_pts = 1;
    }
    // Sanity check. While logically it is correct, max_points ==0 causes
    // downstream problems.
    if (_max_points == 0) {
      _max_points = 1;
    }
    alloc_aligned(((void **) &_data),
                  (_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T),
                  8 * sizeof(T));
    std::memset(_data, 0,
                (_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T));
    _ep = (unsigned) _max_points;
    //_final_graph.reserve(_max_points + _num_frozen_pts);
    _final_graph.resize(_max_points + _num_frozen_pts);
    if (_support_eager_delete) {
      _in_graph.reserve(_max_points + _num_frozen_pts);
      _in_graph.resize(_max_points + _num_frozen_pts);
    }
    if (m == diskann::Metric::COSINE && std::is_floating_point<T>::value) {
      // This is safe because T is float inside the if block.
            this->_distance = (Distance<T> *) new AVXNormalizedCosineDistanceFloat(); 
            this->_normalize_vecs = true;
            std::cout<<"Normalizing vectors and using L2 for cosine AVXNormalizedCosineDistanceFloat()." << std::endl;
      // std::cout << "Need to add functionality for COSINE metric" << std::endl;
    } else {
      this->_distance = get_distance_function<T>(m);
    }
    _locks = std::vector<std::mutex>(_max_points + _num_frozen_pts);
    if (_support_eager_delete)
      _locks_in = std::vector<std::mutex>(_max_points + _num_frozen_pts);
    _width = 0;
  }
  template<typename T, typename TagT>
  Index<T, TagT>::~Index() {
    // Ensure that no other activity is happening before dtor()
    std::unique_lock<std::shared_timed_mutex> ul(_update_lock);
    std::unique_lock<std::shared_timed_mutex> tul(_tag_lock);
    std::unique_lock<std::shared_timed_mutex> tdl(_delete_lock);
    for (auto &lock : _locks) {
      LockGuard lg(lock);
    }
    for (auto &lock : _locks_in) {
      LockGuard lg(lock);
    }
    if (this->_distance != nullptr) {
      delete this->_distance;
      this->_distance = nullptr;
    }
    if (this->_data != nullptr) {
      aligned_free(this->_data);
      this->_data = nullptr;
    }
    while (!_query_scratch.empty()) {
      auto val = _query_scratch.pop();
      while (val.indices == nullptr) {
        _query_scratch.wait_for_push_notify();
        val = _query_scratch.pop();
      }
      val.destroy();
    }
  }
    _locks = std::vector<std::mutex>(_max_points + _num_frozen_pts);
    if (_support_eager_delete)
      _locks_in = std::vector<std::mutex>(_max_points + _num_frozen_pts);
    _width = 0;
  }

search_invocation 是什么?_dynamic_index 是什么?

选边策略:

template<typename T, typename TagT>
  void Index<T, TagT>::occlude_list(std::vector<Neighbor> &pool,
                                    const float alpha, const unsigned degree,
                                    const unsigned         maxc,
                                    std::vector<Neighbor> &result,
                                    std::vector<float> &   occlude_factor) {
    if (pool.empty())
      return;
    assert(std::is_sorted(pool.begin(), pool.end()));
    assert(!pool.empty());
    float cur_alpha = 1;
    while (cur_alpha <= alpha && result.size() < degree) {
      unsigned start = 0;
      float eps = cur_alpha + 0.01f;  // used for MIPS, where we store a value
                                      // of eps in cur_alpha to
      // denote pruned out entries which we can skip in later rounds.
      while (result.size() < degree && (start) < pool.size() && start < maxc) {
        auto &p = pool[start];
        if (occlude_factor[start] > cur_alpha) {
          start++;
          continue;
        }
        occlude_factor[start] = std::numeric_limits<float>::max();
        result.push_back(p);
        for (unsigned t = start + 1; t < pool.size() && t < maxc; t++) {
          if (occlude_factor[t] > alpha)
            continue;
          float djk = _distance->compare(
              _data + _aligned_dim * (size_t) pool[t].id,
              _data + _aligned_dim * (size_t) p.id, (unsigned) _aligned_dim);
          if (_dist_metric == diskann::Metric::L2 ||
              _dist_metric == diskann::Metric::COSINE) {
            occlude_factor[t] =
                //(std::max)(occlude_factor[t], pool[t].distance / djk);
                diskann_max(occlude_factor[t], pool[t].distance / djk);
          } else if (_dist_metric ==
                     diskann::Metric::INNER_PRODUCT) {  // stylized rules for
                                                        // inner product since
                                                        // we want max instead
                                                        // of min distance
            float x = -pool[t].distance;
            float y = -djk;
            if (y > cur_alpha * x) {
              occlude_factor[t] =
                  /* (std::max)*/ diskann_max(occlude_factor[t], eps);
            }
          }
        }
        start++;
      }
      cur_alpha *= 1.2;
    }
  }

这里有一个很有意思的点,cur_alpha 从 1 开始递增迭代来计算。

# build_disk

建立索引入口:

int build_disk_index(const char *dataFilePath, const char *indexFilePath,
                        const char *    indexBuildParameters,
                        diskann::Metric compareMetric)

dataFilePath: base_bin;indexFilePath: 保存索引路径;indexBuildParameter:构建索引的参数;compareMetric:一般为 l2。

这里有一系列的路径,我先记录下来:

std::string base_file(dataFilePath); // base
std::string data_file_to_use = base_file; // base
std::string index_prefix_path(indexFilePath); // index 前缀
std::string pq_pivots_path = index_prefix_path + "_pq_pivots.bin"; // 保存pivots
std::string pq_compressed_vectors_path =
    index_prefix_path + "_pq_compressed.bin"; // 保存压缩的pq
std::string mem_index_path = index_prefix_path + "_mem.index"; // 保存内存索引
std::string disk_index_path = index_prefix_path + "_disk.index"; 
std::string medoids_path = disk_index_path + "_medoids.bin";
std::string centroids_path = disk_index_path + "_centroids.bin";
std::string sample_base_prefix = index_prefix_path + "_sample";

划分 pq 块:

double final_index_ram_limit = get_memory_budget(param_list[2]);
 size_t num_pq_chunks =
        (size_t)(std::floor)(_u64(final_index_ram_limit / points_num));

随机采样训练数据(0,1)分布:

double p_val = ((double) MAX_PQ_TRAINING_SET_SIZE / (double) points_num); // 这里 max 是 256000L,也就是说最多训练这么多数据
gen_random_slice<T>(data_file_to_use.c_str(), p_val, train_data, train_size,
                        train_dim); // 按照 p_val 进行采样,因为这里只是使用了 10000 个数据,所以 p>1 直接取全部数据进行训练

PQ 训练

generate_pq_pivots(train_data, train_size, (uint32_t) dim, 256,
                       (uint32_t) num_pq_chunks, NUM_KMEANS_REPS,
                       pq_pivots_path, make_zero_mean); //l2 make_zero_mean 为 true
int generate_pq_pivots(const float *passed_train_data, size_t num_train,
                       unsigned dim, unsigned num_centers,
                       unsigned num_pq_chunks, unsigned max_k_means_reps,
                       std::string pq_pivots_path, bool make_zero_mean) // 固定 center 为 256,max_k_means_reps 为 12

如果 pq_pivots_path 文件存在,那么直接读入,反之则需要进行训练。

l2 计算预处理,可以将 traindata 数据更加集中与 center,由于 sift 数据集向量为 128 维,使用 32 个 pq_chunks,可以被等分,每一个 chunks 负责 4 个维度,又因为 pq center 为 256,所以 8 位 1byte 就可以表示,4 维度会降维到 1 维,并且有 float 变为 uint8。

size_t cur_chunk_size = chunk_offsets[i + 1] - chunk_offsets[i]; // 这个直接为 4
std::unique_ptr<float[]> cur_pivot_data =
        std::make_unique<float[]>(num_centers * cur_chunk_size);
std::unique_ptr<float[]> cur_data =
        std::make_unique<float[]>(num_train * cur_chunk_size);
std::unique_ptr<uint32_t[]> closest_center =
        std::make_unique<uint32_t[]>(num_train);
// 从 traindata 中导入对应维度的数据到 cur_data,也就是 4 维的数据

keams 选择 pivots:

kmeans::kmeanspp_selecting_pivots(cur_data.get(), num_train, cur_chunk_size,
                                      cur_pivot_data.get(), num_centers);
void kmeanspp_selecting_pivots(float* data, size_t num_points, size_t dim,
                                 float* pivot_data, size_t num_centers)

首先随机选择一个初始的点;

roll init_id
picked.push_back(init_id);
std::memcpy(pivot_data, data + init_id * dim, dim * sizeof(float)); // 拷贝到 cur_pivot_data 中,dim 这里是 4

计算所有点与该点的距离,然后用一个随机的迭代的方式获取其他 256 个随机点,说实话,这里我不是很明白,他为什么这么做。。。。

选择了 256 个 center 并保存在 cur_pivot_data 中之后,用 lloyds 算法进行训练:

kmeans::run_lloyds(cur_data.get(), num_train, cur_chunk_size,
                       cur_pivot_data.get(), num_centers, max_k_means_reps,
                       NULL, closest_center.get());
float run_lloyds(float* data, size_t num_points, size_t dim, float* centers,
                   const size_t num_centers, const size_t max_reps,
                   std::vector<size_t>* closest_docs,
                   uint32_t*            closest_center) //cloest_docs 就是说这个 center 内有哪些节点,类似于倒排索引
float* docs_l2sq = new float[num_points];
math_utils::compute_vecs_l2sq(docs_l2sq, data, num_points, dim);
// 迭代训练
lloyds_iter(data, num_points, dim, centers, num_centers,
                             docs_l2sq, closest_docs, closest_center);
float lloyds_iter(float* data, size_t num_points, size_t dim, float* centers,
                    size_t num_centers, float* docs_l2sq,
                    std::vector<size_t>* closest_docs,
                    uint32_t*&           closest_center)

开始进行计算 closest_centers:

math_utils::compute_closest_centers(data, num_points, dim, centers,
                                        num_centers, 1, closest_center,
                                        closest_docs, docs_l2sq);
void compute_closest_centers(float* data, size_t num_points, size_t dim,
                               float* pivot_data, size_t num_centers, size_t k,
                               uint32_t*            closest_centers_ivf,
                               std::vector<size_t>* inverted_index,
                               float*               pts_norms_squared)
    
uint32_t* closest_centers = new uint32_t[PAR_BLOCK_SIZE * k];
float*    distance_matrix = new float[num_centers * PAR_BLOCK_SIZE];
math_utils::compute_closest_centers_in_block(
          data, num_ponts, dim, pivot_data, num_centers,
          pts_norms_squared, pivs_norms_squared, closest_centers, distance_matrix,
          k);
void compute_closest_centers_in_block(
      const float* const data, const size_t num_points, const size_t dim,
      const float* const centers, const size_t num_centers,
      const float* const docs_l2sq, const float* const centers_l2sq,
      uint32_t* center_index, float* const dist_matrix, size_t k)

这里他用了 MKL 的函数进行矩阵计算,说实话这一块不是很懂,先记下来以后有需要再去看:https://murphypei.github.io/blog/2019/09/cblas-gemm-gemv

计算好所有 pivot,以及 cloest_center 之后就可以进行保存了:

for (uint64_t j = 0; j < num_centers; j++) {
    std::memcpy(full_pivot_data.get() + j * dim + chunk_offsets[i], 
                cur_pivot_data.get() + j * cur_chunk_size,
                cur_chunk_size * sizeof(float)); // 保存全精度码本
}
diskann::save_bin<float>(pq_pivots_path.c_str(), full_pivot_data.get(),
                           (size_t) num_centers, dim); 
std::string centroids_path = pq_pivots_path + "_centroid.bin";
diskann::save_bin<float>(centroids_path.c_str(), centroid.get(), (size_t) dim,
                         1); // 点集的中心向量
std::string rearrangement_path = pq_pivots_path + "_rearrangement_perm.bin";
diskann::save_bin<uint32_t>(rearrangement_path.c_str(), rearrangement.data(),
                            rearrangement.size(), 1); // 就是维度是按什么顺序进行 rearrangement
std::string chunk_offsets_path = pq_pivots_path + "_chunk_offsets.bin";
diskann::save_bin<uint32_t>(chunk_offsets_path.c_str(), chunk_offsets.data(),
                            chunk_offsets.size(), 1); // 每个 pq chunk 负责的原始维度数目

回到构建索引函数中:

generate_pq_pivots(train_data, train_size, (uint32_t) dim, 256,
                       (uint32_t) num_pq_chunks, NUM_KMEANS_REPS,
                       pq_pivots_path, make_zero_mean);
generate_pq_data_from_pivots<T>(data_file_to_use.c_str(), 256,
                                    (uint32_t) num_pq_chunks, pq_pivots_path,
                                    pq_compressed_vectors_path);

这个 generate_pq_data_from_pivots 就是根据 pivot 把向量进行 qp 压缩。注意上面的 pivot 是用采样数据进行训练的。

构建索引:

diskann::build_merged_vamana_index<T>(
        data_file_to_use.c_str(), diskann::Metric::L2, L, R, p_val,
        indexing_ram_budget, mem_index_path, medoids_path, centroids_path);
int build_merged_vamana_index(std::string     base_file,
                                diskann::Metric compareMetric, unsigned L,
                                unsigned R, double sampling_rate,
                                double ram_budget, std::string mem_index_path,
                                std::string medoids_file,
                                std::string centroids_file)
paras.Set<unsigned>("L", (unsigned) L);
paras.Set<unsigned>("R", (unsigned) R);
paras.Set<unsigned>("C", 750); // 这个 C 是真的奇奇妙妙的,这个参数确定的是最终裁边的时候侯选池的大小
paras.Set<float>("alpha", 1.2f); // 在这里设置了 alpha
paras.Set<unsigned>("num_rnds", 2);
paras.Set<bool>("saturate_graph", 1);
paras.Set<std::string>("save_path", mem_index_path);

这里就需要根据内存的预算进行划分,判断需要划分为多少个 part:

int         num_parts =
        partition_with_ram_budget<T>(base_file, sampling_rate, ram_budget,
                                     2 * R / 3, merged_index_prefix, 2);
// 注意这里的 kbase 参数,这个参数实际上就是说把一个向量 assign 到 kbase 个 cluster 中
template<typename T>
int partition_with_ram_budget(const std::string data_file,
                              const double sampling_rate, double ram_budget,
                              size_t            graph_degree,
                              const std::string prefix_path, size_t k_base);

这里划分不是随机进行划分的,代码首先以 part=3 为起始,进行 Kmeans 聚类,把对应的向量放到不同的 part 中。并且它还是生成一个 shard_idmap 这样的文件,记录每个 kmeans 聚类中的点 id *refix_path* + "_subshard-" + std::to_string(i) + "_ids_uint32.bin

我们根据每一个 part 进行数据划分:

std::string merged_index_prefix = mem_index_path + "_tempFiles"; 
std::string shard_base_file =
          merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin";
std::string shard_ids_file = merged_index_prefix + "_subshard-" +
std::to_string(p) + "_ids_uint32.bin";
retrieve_shard_data_from_ids<T>(base_file, shard_ids_file,
shard_base_file);
template<typename T>
int retrieve_shard_data_from_ids(const std::string data_file,
                                 std::string       idmap_filename,
                                 std::string       data_filename);
std::string shard_index_file =
          merged_index_prefix + "_subshard-" + std::to_string(p) + "_mem.index";

通过 retrieve_shard_data_from_ids 我们得到了部分重叠的采样数据,并把数据放到 shard_base_file。

我们进入到 Index 的类构造函数中,会发现一些参数比如 frozen_pts 这些都不需要考虑。一个需要注意的点就是 diskann 可以支持 dim 不被 8 整除,会有一个 aligned 操作, _aligned_dim = ROUND_UP(_dim, 8);

_u64 shard_base_dim, shard_base_pts;
get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim);
std::unique_ptr<diskann::Index<T>> _pvamanaIndex =
    std::unique_ptr<diskann::Index<T>>(
    new diskann::Index<T>(compareMetric, shard_base_dim,
                          shard_base_pts, false));  // TODO: Single?
_pvamanaIndex->build(shard_base_file.c_str(), shard_base_pts, paras);
_pvamanaIndex->save(shard_index_file.c_str());
template<typename T, typename TagT>
Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
                      const bool dynamic_index,
                      const bool enable_tags, const bool support_eager_delete) //bool 值全部默认 false
    : _dist_metric(m), _dim(dim), _max_points(max_points),
_dynamic_index(dynamic_index), _enable_tags(enable_tags),
_support_eager_delete(support_eager_delete)

这里 build 有两个重载,但是明明参数和这两个重载函数都不一致。。。最后 gdb 打断点发现进入的是这个 build 函数内,真的奇奇怪怪的:

void Index<T, TagT>::build(const char *             filename,
                             const size_t             num_points_to_load,
                             Parameters &             parameters,
                             const std::vector<TagT> &tags)

build 这个函数里面其实能看的还是比较少的,大部分代码都在做安全性检查。

我们进入到 link 这个函数里面:

link(parameters);
void Index<T, TagT>::link(Parameters &parameters)
    
unsigned num_threads = parameters.Get<unsigned>("num_threads"); // 注意如果不指定 num_threads,会使用所有可用的 cpu
if (num_threads != 0)
    omp_set_num_threads(num_threads);

link 操作会执行两次,也就是跑两边 vanama,先把一些参数准备好:

_indexingQueueSize = parameters.Get<unsigned>("L");  // Search list size
_indexingRange = parameters.Get<unsigned>("R");
_indexingMaxC = parameters.Get<unsigned>("C");
const float last_round_alpha = parameters.Get<float>("alpha");
unsigned    L = _indexingQueueSize;
uint32_t num_syncs =
        (unsigned) DIV_ROUND_UP(_nd + _num_frozen_pts, (64 * 64));
if (num_syncs < 40) // 根据节点数量,可以把当前数据分成多块,虽然并不会并行,但是会让打印进度更加方便:
    num_syncs = 40;
std::vector<unsigned> Lvec;
Lvec.push_back(L);
Lvec.push_back(L);
const unsigned NUM_RNDS = 2; // 两阶段
 
_indexingAlpha = 1.0f; //index 的 alpha 参数
std::vector<unsigned>          visit_order;
std::vector<diskann::Neighbor> pool, tmp;
tsl::robin_set<unsigned>       visited;

计算 entry_point,并且设置一些时间参数:

_ep = calculate_entry_point();
// 这里是返回离数据集中心最近的点(也是数据集上的点)
double   sync_time = 0, total_sync_time = 0;
double   inter_time = 0, total_inter_time = 0;
size_t   inter_count = 0, total_inter_count = 0;
// 一些时间参数

这里有多个循环,很容易就绕晕了,最外层是 vanama 次数,然后是切分数据,最后是

for (uint32_t rnd_no = 0; rnd_no < NUM_RNDS; rnd_no++) { // 默认执行两轮
 	for (uint32_t sync_num = 0; sync_num < num_syncs; sync_num++) {
        // 这个 for 并行计算
 		for (_s64 node_ctr = (_s64) start_id; node_ctr < (_s64) end_id;
             ++node_ctr)

使用贪心检索查找候选邻居,这里有太多层了,每一层换一个参数。。。。:

pool.reserve(L * 2);
visited.reserve(L * 2);
get_expanded_nodes(node, L, init_ids, pool, visited);
void Index<T, TagT>::get_expanded_nodes(
    const size_t node_id, const unsigned Lindex,
    std::vector<unsigned>     init_ids,
    std::vector<Neighbor> &   expanded_nodes_info,
    tsl::robin_set<unsigned> &expanded_nodes_ids) {
    const T *node_coords = _data + _aligned_dim * node_id;
    if (init_ids.size() == 0)
        init_ids.emplace_back(_ep);
    std::vector<unsigned> des;
    std::vector<Neighbor> best_L_nodes;
    best_L_nodes.resize(Lindex + 1);
    tsl::robin_set<unsigned> inserted_into_pool_rs;
    boost::dynamic_bitset<>  inserted_into_pool_bs;
    iterate_to_fixed_point(node_coords, Lindex, init_ids, expanded_nodes_info,
                           expanded_nodes_ids, best_L_nodes, des,
                           inserted_into_pool_rs, inserted_into_pool_bs);
}
std::pair<uint32_t, uint32_t> Index<T, TagT>::iterate_to_fixed_point(
    const T *node_coords, const unsigned Lsize,
    const std::vector<unsigned> &init_ids,
    std::vector<Neighbor> &      expanded_nodes_info,
    tsl::robin_set<unsigned> &   expanded_nodes_ids,
    std::vector<Neighbor> &best_L_nodes, std::vector<unsigned> &des,
    tsl::robin_set<unsigned> &inserted_into_pool_rs,
    boost::dynamic_bitset<> &inserted_into_pool_bs, bool ret_frozen,
    bool search_invocation)

这里代码首先把 init_ids(len=1,因为是去 center 作为 entrypoint 并且没有添加其他的元素)中 ep 给放到 best_L_nodes 中,并且设置为已经插入到 pool,inserted_into_pool_bs[id] = 1。

这里我 gdb 打断点后发现,vanama 根本没有使用初始化随机图,而是直接采用插入的方法。 鹅美静!

image-20220609203248466

这里使用贪心算法,在图上进行检索,然后裁边:

get_expanded_nodes(node, L, init_ids, pool, visited);
// 添加该点现有的邻居节点
prune_neighbors(node, pool, pruned_list);

这一块太抽象了,很难用文字描述,简单来说就是论文算法的实现。

这里它用 alpha 因子进行 occlude 的时候不是直接完成的,而且反复迭代增大初始因子进行的,这点很有意思。

现在分块构建的索引已经保存好了,需要做的就是 merge:

diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index",
                          merged_index_prefix + "_subshard-", "_ids_uint32.bin",
                          num_parts, R, mem_index_path, medoids_file);
                          
int merge_shards(const std::string &vamana_prefix,
                   const std::string &vamana_suffix,
                   const std::string &idmaps_prefix,
                   const std::string &idmaps_suffix, const _u64 nshards,
                   unsigned max_degree, const std::string &output_vamana,
                   const std::string &medoids_file)