DiskANN（一）：磁盘索引构建 - 数据库

# 编译

	cmake -DCMAKE_BUILD_TYPE=Debug -B build
	cmake --build build -j 8

出现报错 [CMake is not able to find BOOST libraries](https://stackoverflow.com/questions/24173330/cmake-is-not-able-to-find-boost-libraries) , 解决 sudo apt-get install cmake libblkid-dev e2fslibs-dev libboost-all-dev libaudit-dev 。

# 基于内存方案

构建索引的参数：

1data_type、2dist_fn、3data_file、4index_path_prefix、5max_degree、6size_of_build_search_list、7search_DRAM_budget、8build_DRAM_budget、9 num_thread、10PQ_disk_bytes

# Index 构建函数

	template<typename T, typename TagT>
	Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
	const bool dynamic_index,
	const Parameters &indexParams,
	const Parameters &searchParams, const bool enable_tags,
	const bool support_eager_delete)
	: Index(m, dim, max_points, dynamic_index, enable_tags, support_eager_delete) { // Thank you C++ 11!
	_indexingQueueSize = indexParams.Get<uint32_t>("L");
	_indexingRange = indexParams.Get<uint32_t>("R");
	_indexingMaxC = indexParams.Get<uint32_t>("C");
	_indexingAlpha = indexParams.Get<float>("alpha");

	uint32_t num_threads_srch = searchParams.Get<uint32_t>("num_threads");
	uint32_t num_threads_indx = indexParams.Get<uint32_t>("num_threads");
	uint32_t num_threads = diskann_max(num_threads_srch, num_threads_indx);
	uint32_t search_l = searchParams.Get<uint32_t>("L");

	initialize_query_scratch(num_threads, search_l, _indexingQueueSize,
	_indexingRange, dim);
	}

	template<typename T, typename TagT>
	Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
	const bool dynamic_index,
	const bool enable_tags, const bool support_eager_delete)
	: _dist_metric(m), _dim(dim), _max_points(max_points),
	_dynamic_index(dynamic_index), _enable_tags(enable_tags),
	_support_eager_delete(support_eager_delete) {
	if (dynamic_index && !enable_tags) {
	throw diskann::ANNException(
	"ERROR: Eager Deletes must have Dynamic Indexing enabled.", -1,
	__FUNCSIG__, __FILE__, __LINE__);
	diskann::cerr
	<< "WARNING: Dynamic Indices must have tags enabled. Auto-enabling."
	<< std::endl;
	_enable_tags = true;
	}
	if (support_eager_delete && !dynamic_index) {
	diskann::cerr << "ERROR: Eager Deletes must have Dynamic Indexing "
	"enabled. Exitting."
	<< std::endl;
	throw diskann::ANNException(
	"ERROR: Eager deletes are possible only if dynamic indexing is "
	"enabled. Exiting.",
	-1, __FUNCSIG__, __FILE__, __LINE__);
	}
	// data is stored to _nd * aligned_dim matrix with necessary
	// zero-padding
	_aligned_dim = ROUND_UP(_dim, 8);

	if (dynamic_index) {
	_num_frozen_pts = 1;
	}
	// Sanity check. While logically it is correct, max_points ==0 causes
	// downstream problems.
	if (_max_points == 0) {
	_max_points = 1;
	}

	alloc_aligned(((void **) &_data),
	(_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T),
	8 * sizeof(T));
	std::memset(_data, 0,
	(_max_points + _num_frozen_pts) * _aligned_dim * sizeof(T));

	_ep = (unsigned) _max_points;

	//_final_graph.reserve(_max_points + _num_frozen_pts);
	_final_graph.resize(_max_points + _num_frozen_pts);

	if (_support_eager_delete) {
	_in_graph.reserve(_max_points + _num_frozen_pts);
	_in_graph.resize(_max_points + _num_frozen_pts);
	}

	if (m == diskann::Metric::COSINE && std::is_floating_point<T>::value) {
	// This is safe because T is float inside the if block.
	this->_distance = (Distance<T> *) new AVXNormalizedCosineDistanceFloat();
	this->_normalize_vecs = true;
	std::cout<<"Normalizing vectors and using L2 for cosine AVXNormalizedCosineDistanceFloat()." << std::endl;
	// std::cout << "Need to add functionality for COSINE metric" << std::endl;
	} else {
	this->_distance = get_distance_function<T>(m);
	}

	_locks = std::vector<std::mutex>(_max_points + _num_frozen_pts);

	if (_support_eager_delete)
	_locks_in = std::vector<std::mutex>(_max_points + _num_frozen_pts);

	_width = 0;
	}

	template<typename T, typename TagT>
	Index<T, TagT>::~Index() {
	// Ensure that no other activity is happening before dtor()
	std::unique_lock<std::shared_timed_mutex> ul(_update_lock);
	std::unique_lock<std::shared_timed_mutex> tul(_tag_lock);
	std::unique_lock<std::shared_timed_mutex> tdl(_delete_lock);

	for (auto &lock : _locks) {
	LockGuard lg(lock);
	}
	for (auto &lock : _locks_in) {
	LockGuard lg(lock);
	}

	if (this->_distance != nullptr) {
	delete this->_distance;
	this->_distance = nullptr;
	}
	if (this->_data != nullptr) {
	aligned_free(this->_data);
	this->_data = nullptr;
	}

	while (!_query_scratch.empty()) {
	auto val = _query_scratch.pop();
	while (val.indices == nullptr) {
	_query_scratch.wait_for_push_notify();
	val = _query_scratch.pop();
	}
	val.destroy();
	}
	}

	_locks = std::vector<std::mutex>(_max_points + _num_frozen_pts);

	if (_support_eager_delete)
	_locks_in = std::vector<std::mutex>(_max_points + _num_frozen_pts);

	_width = 0;
	}

search_invocation 是什么？_dynamic_index 是什么？

选边策略：

	template<typename T, typename TagT>
	void Index<T, TagT>::occlude_list(std::vector<Neighbor> &pool,
	const float alpha, const unsigned degree,
	const unsigned maxc,
	std::vector<Neighbor> &result,
	std::vector<float> & occlude_factor) {
	if (pool.empty())
	return;
	assert(std::is_sorted(pool.begin(), pool.end()));
	assert(!pool.empty());

	float cur_alpha = 1;
	while (cur_alpha <= alpha && result.size() < degree) {
	unsigned start = 0;
	float eps = cur_alpha + 0.01f; // used for MIPS, where we store a value
	// of eps in cur_alpha to
	// denote pruned out entries which we can skip in later rounds.
	while (result.size() < degree && (start) < pool.size() && start < maxc) {
	auto &p = pool[start];
	if (occlude_factor[start] > cur_alpha) {
	start++;
	continue;
	}
	occlude_factor[start] = std::numeric_limits<float>::max();
	result.push_back(p);
	for (unsigned t = start + 1; t < pool.size() && t < maxc; t++) {
	if (occlude_factor[t] > alpha)
	continue;
	float djk = _distance->compare(
	_data + _aligned_dim * (size_t) pool[t].id,
	_data + _aligned_dim * (size_t) p.id, (unsigned) _aligned_dim);
	if (_dist_metric == diskann::Metric::L2 \|\|
	_dist_metric == diskann::Metric::COSINE) {
	occlude_factor[t] =
	//(std::max)(occlude_factor[t], pool[t].distance / djk);
	diskann_max(occlude_factor[t], pool[t].distance / djk);
	} else if (_dist_metric ==
	diskann::Metric::INNER_PRODUCT) { // stylized rules for
	// inner product since
	// we want max instead
	// of min distance
	float x = -pool[t].distance;
	float y = -djk;
	if (y > cur_alpha * x) {
	occlude_factor[t] =
	/* (std::max)*/ diskann_max(occlude_factor[t], eps);
	}
	}
	}
	start++;
	}
	cur_alpha *= 1.2;
	}
	}

这里有一个很有意思的点，cur_alpha 从 1 开始递增迭代来计算。

# build_disk

建立索引入口：

	int build_disk_index(const char dataFilePath, const char indexFilePath,
	const char * indexBuildParameters,
	diskann::Metric compareMetric)

dataFilePath: base_bin；indexFilePath: 保存索引路径；indexBuildParameter：构建索引的参数；compareMetric：一般为 l2。

这里有一系列的路径，我先记录下来：

std::string base_file(dataFilePath); // base
std::string data_file_to_use = base_file; // base
std::string index_prefix_path(indexFilePath); // index 前缀
std::string pq_pivots_path = index_prefix_path + "_pq_pivots.bin"; // 保存pivots
std::string pq_compressed_vectors_path =
    index_prefix_path + "_pq_compressed.bin"; // 保存压缩的pq
std::string mem_index_path = index_prefix_path + "_mem.index"; // 保存内存索引
std::string disk_index_path = index_prefix_path + "_disk.index"; 
std::string medoids_path = disk_index_path + "_medoids.bin";
std::string centroids_path = disk_index_path + "_centroids.bin";
std::string sample_base_prefix = index_prefix_path + "_sample";

划分 pq 块：

	double final_index_ram_limit = get_memory_budget(param_list[2]);
	size_t num_pq_chunks =
	(size_t)(std::floor)(_u64(final_index_ram_limit / points_num));

随机采样训练数据（0,1）分布：

double p_val = ((double) MAX_PQ_TRAINING_SET_SIZE / (double) points_num); // 这里 max 是 256000L，也就是说最多训练这么多数据

gen_random_slice<T>(data_file_to_use.c_str(), p_val, train_data, train_size,

                        train_dim); // 按照 p_val 进行采样，因为这里只是使用了 10000 个数据，所以 p>1 直接取全部数据进行训练

PQ 训练

	generate_pq_pivots(train_data, train_size, (uint32_t) dim, 256,
	(uint32_t) num_pq_chunks, NUM_KMEANS_REPS,
	pq_pivots_path, make_zero_mean); //l2 make_zero_mean 为 true
	int generate_pq_pivots(const float *passed_train_data, size_t num_train,
	unsigned dim, unsigned num_centers,
	unsigned num_pq_chunks, unsigned max_k_means_reps,
	std::string pq_pivots_path, bool make_zero_mean) // 固定 center 为 256，max_k_means_reps 为 12

如果 pq_pivots_path 文件存在，那么直接读入，反之则需要进行训练。

l2 计算预处理，可以将 traindata 数据更加集中与 center，由于 sift 数据集向量为 128 维，使用 32 个 pq_chunks，可以被等分，每一个 chunks 负责 4 个维度，又因为 pq center 为 256，所以 8 位 1byte 就可以表示，4 维度会降维到 1 维，并且有 float 变为 uint8。

	size_t cur_chunk_size = chunk_offsets[i + 1] - chunk_offsets[i]; // 这个直接为 4
	std::unique_ptr<float[]> cur_pivot_data =
	std::make_unique<float[]>(num_centers * cur_chunk_size);
	std::unique_ptr<float[]> cur_data =
	std::make_unique<float[]>(num_train * cur_chunk_size);
	std::unique_ptr<uint32_t[]> closest_center =
	std::make_unique<uint32_t[]>(num_train);
	// 从 traindata 中导入对应维度的数据到 cur_data，也就是 4 维的数据

keams 选择 pivots：

	kmeans::kmeanspp_selecting_pivots(cur_data.get(), num_train, cur_chunk_size,
	cur_pivot_data.get(), num_centers);
	void kmeanspp_selecting_pivots(float* data, size_t num_points, size_t dim,
	float* pivot_data, size_t num_centers)

首先随机选择一个初始的点；

	roll init_id
	picked.push_back(init_id);
	std::memcpy(pivot_data, data + init_id * dim, dim * sizeof(float)); // 拷贝到 cur_pivot_data 中，dim 这里是 4

计算所有点与该点的距离，然后用一个随机的迭代的方式获取其他 256 个随机点，说实话，这里我不是很明白，他为什么这么做。。。。

选择了 256 个 center 并保存在 cur_pivot_data 中之后，用 lloyds 算法进行训练：

	kmeans::run_lloyds(cur_data.get(), num_train, cur_chunk_size,
	cur_pivot_data.get(), num_centers, max_k_means_reps,
	NULL, closest_center.get());
	float run_lloyds(float* data, size_t num_points, size_t dim, float* centers,
	const size_t num_centers, const size_t max_reps,
	std::vector<size_t>* closest_docs,
	uint32_t* closest_center) //cloest_docs 就是说这个 center 内有哪些节点，类似于倒排索引
	float* docs_l2sq = new float[num_points];
	math_utils::compute_vecs_l2sq(docs_l2sq, data, num_points, dim);
	// 迭代训练
	lloyds_iter(data, num_points, dim, centers, num_centers,
	docs_l2sq, closest_docs, closest_center);
	float lloyds_iter(float* data, size_t num_points, size_t dim, float* centers,
	size_t num_centers, float* docs_l2sq,
	std::vector<size_t>* closest_docs,
	uint32_t*& closest_center)

开始进行计算 closest_centers：

	math_utils::compute_closest_centers(data, num_points, dim, centers,
	num_centers, 1, closest_center,
	closest_docs, docs_l2sq);
	void compute_closest_centers(float* data, size_t num_points, size_t dim,
	float* pivot_data, size_t num_centers, size_t k,
	uint32_t* closest_centers_ivf,
	std::vector<size_t>* inverted_index,
	float* pts_norms_squared)

	uint32_t* closest_centers = new uint32_t[PAR_BLOCK_SIZE * k];
	float* distance_matrix = new float[num_centers * PAR_BLOCK_SIZE];
	math_utils::compute_closest_centers_in_block(
	data, num_ponts, dim, pivot_data, num_centers,
	pts_norms_squared, pivs_norms_squared, closest_centers, distance_matrix,
	k);
	void compute_closest_centers_in_block(
	const float* const data, const size_t num_points, const size_t dim,
	const float* const centers, const size_t num_centers,
	const float* const docs_l2sq, const float* const centers_l2sq,
	uint32_t* center_index, float* const dist_matrix, size_t k)

这里他用了 MKL 的函数进行矩阵计算，说实话这一块不是很懂，先记下来以后有需要再去看：https://murphypei.github.io/blog/2019/09/cblas-gemm-gemv

计算好所有 pivot，以及 cloest_center 之后就可以进行保存了：

	for (uint64_t j = 0; j < num_centers; j++) {
	std::memcpy(full_pivot_data.get() + j * dim + chunk_offsets[i],
	cur_pivot_data.get() + j * cur_chunk_size,
	cur_chunk_size * sizeof(float)); // 保存全精度码本
	}
	diskann::save_bin<float>(pq_pivots_path.c_str(), full_pivot_data.get(),
	(size_t) num_centers, dim);
	std::string centroids_path = pq_pivots_path + "_centroid.bin";
	diskann::save_bin<float>(centroids_path.c_str(), centroid.get(), (size_t) dim,
	1); // 点集的中心向量
	std::string rearrangement_path = pq_pivots_path + "_rearrangement_perm.bin";
	diskann::save_bin<uint32_t>(rearrangement_path.c_str(), rearrangement.data(),
	rearrangement.size(), 1); // 就是维度是按什么顺序进行 rearrangement
	std::string chunk_offsets_path = pq_pivots_path + "_chunk_offsets.bin";
	diskann::save_bin<uint32_t>(chunk_offsets_path.c_str(), chunk_offsets.data(),
	chunk_offsets.size(), 1); // 每个 pq chunk 负责的原始维度数目

回到构建索引函数中：

	generate_pq_pivots(train_data, train_size, (uint32_t) dim, 256,
	(uint32_t) num_pq_chunks, NUM_KMEANS_REPS,
	pq_pivots_path, make_zero_mean);

	generate_pq_data_from_pivots<T>(data_file_to_use.c_str(), 256,
	(uint32_t) num_pq_chunks, pq_pivots_path,
	pq_compressed_vectors_path);

这个 generate_pq_data_from_pivots 就是根据 pivot 把向量进行 qp 压缩。注意上面的 pivot 是用采样数据进行训练的。

构建索引：

	diskann::build_merged_vamana_index<T>(
	data_file_to_use.c_str(), diskann::Metric::L2, L, R, p_val,
	indexing_ram_budget, mem_index_path, medoids_path, centroids_path);
	int build_merged_vamana_index(std::string base_file,
	diskann::Metric compareMetric, unsigned L,
	unsigned R, double sampling_rate,
	double ram_budget, std::string mem_index_path,
	std::string medoids_file,
	std::string centroids_file)
	paras.Set<unsigned>("L", (unsigned) L);
	paras.Set<unsigned>("R", (unsigned) R);
	paras.Set<unsigned>("C", 750); // 这个 C 是真的奇奇妙妙的，这个参数确定的是最终裁边的时候侯选池的大小
	paras.Set<float>("alpha", 1.2f); // 在这里设置了 alpha
	paras.Set<unsigned>("num_rnds", 2);
	paras.Set<bool>("saturate_graph", 1);
	paras.Set<std::string>("save_path", mem_index_path);

这里就需要根据内存的预算进行划分，判断需要划分为多少个 part：

	int num_parts =
	partition_with_ram_budget<T>(base_file, sampling_rate, ram_budget,
	2 * R / 3, merged_index_prefix, 2);
	// 注意这里的 kbase 参数，这个参数实际上就是说把一个向量 assign 到 kbase 个 cluster 中
	template<typename T>
	int partition_with_ram_budget(const std::string data_file,
	const double sampling_rate, double ram_budget,
	size_t graph_degree,
	const std::string prefix_path, size_t k_base);

这里划分不是随机进行划分的，代码首先以 part=3 为起始，进行 Kmeans 聚类，把对应的向量放到不同的 part 中。并且它还是生成一个 shard_idmap 这样的文件，记录每个 kmeans 聚类中的点 id *refix_path* + "_subshard-" + std::to_string(i) + "_ids_uint32.bin 。

我们根据每一个 part 进行数据划分：

	std::string merged_index_prefix = mem_index_path + "_tempFiles";
	std::string shard_base_file =
	merged_index_prefix + "_subshard-" + std::to_string(p) + ".bin";

	std::string shard_ids_file = merged_index_prefix + "_subshard-" +
	std::to_string(p) + "_ids_uint32.bin";

	retrieve_shard_data_from_ids<T>(base_file, shard_ids_file,
	shard_base_file);
	template<typename T>
	int retrieve_shard_data_from_ids(const std::string data_file,
	std::string idmap_filename,
	std::string data_filename);
	std::string shard_index_file =
	merged_index_prefix + "_subshard-" + std::to_string(p) + "_mem.index";

通过 retrieve_shard_data_from_ids 我们得到了部分重叠的采样数据，并把数据放到 shard_base_file。

我们进入到 Index 的类构造函数中，会发现一些参数比如 frozen_pts 这些都不需要考虑。一个需要注意的点就是 diskann 可以支持 dim 不被 8 整除，会有一个 aligned 操作， _aligned_dim = ROUND_UP(_dim, 8); 。

	_u64 shard_base_dim, shard_base_pts;
	get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim);
	std::unique_ptr<diskann::Index<T>> _pvamanaIndex =
	std::unique_ptr<diskann::Index<T>>(
	new diskann::Index<T>(compareMetric, shard_base_dim,
	shard_base_pts, false)); // TODO: Single?
	_pvamanaIndex->build(shard_base_file.c_str(), shard_base_pts, paras);
	_pvamanaIndex->save(shard_index_file.c_str());

	template<typename T, typename TagT>
	Index<T, TagT>::Index(Metric m, const size_t dim, const size_t max_points,
	const bool dynamic_index,
	const bool enable_tags, const bool support_eager_delete) //bool 值全部默认 false
	: _dist_metric(m), _dim(dim), _max_points(max_points),
	_dynamic_index(dynamic_index), _enable_tags(enable_tags),
	_support_eager_delete(support_eager_delete)

这里 build 有两个重载，但是明明参数和这两个重载函数都不一致。。。最后 gdb 打断点发现进入的是这个 build 函数内，真的奇奇怪怪的：

	void Index<T, TagT>::build(const char * filename,
	const size_t num_points_to_load,
	Parameters & parameters,
	const std::vector<TagT> &tags)

build 这个函数里面其实能看的还是比较少的，大部分代码都在做安全性检查。

我们进入到 link 这个函数里面：

	link(parameters);
	void Index<T, TagT>::link(Parameters &parameters)

	unsigned num_threads = parameters.Get<unsigned>("num_threads"); // 注意如果不指定 num_threads，会使用所有可用的 cpu
	if (num_threads != 0)
	omp_set_num_threads(num_threads);

link 操作会执行两次，也就是跑两边 vanama，先把一些参数准备好：

	_indexingQueueSize = parameters.Get<unsigned>("L"); // Search list size
	_indexingRange = parameters.Get<unsigned>("R");
	_indexingMaxC = parameters.Get<unsigned>("C");
	const float last_round_alpha = parameters.Get<float>("alpha");
	unsigned L = _indexingQueueSize;

	uint32_t num_syncs =
	(unsigned) DIV_ROUND_UP(_nd + _num_frozen_pts, (64 * 64));
	if (num_syncs < 40) // 根据节点数量，可以把当前数据分成多块，虽然并不会并行，但是会让打印进度更加方便：
	num_syncs = 40;

	std::vector<unsigned> Lvec;
	Lvec.push_back(L);
	Lvec.push_back(L);
	const unsigned NUM_RNDS = 2; // 两阶段


	_indexingAlpha = 1.0f; //index 的 alpha 参数

	std::vector<unsigned> visit_order;
	std::vector<diskann::Neighbor> pool, tmp;
	tsl::robin_set<unsigned> visited;

计算 entry_point，并且设置一些时间参数:

	_ep = calculate_entry_point();
	// 这里是返回离数据集中心最近的点（也是数据集上的点）

	double sync_time = 0, total_sync_time = 0;
	double inter_time = 0, total_inter_time = 0;
	size_t inter_count = 0, total_inter_count = 0;
	// 一些时间参数

这里有多个循环，很容易就绕晕了，最外层是 vanama 次数，然后是切分数据，最后是

	for (uint32_t rnd_no = 0; rnd_no < NUM_RNDS; rnd_no++) { // 默认执行两轮
	for (uint32_t sync_num = 0; sync_num < num_syncs; sync_num++) {
	// 这个 for 并行计算
	for (_s64 node_ctr = (_s64) start_id; node_ctr < (_s64) end_id;
	++node_ctr)

使用贪心检索查找候选邻居，这里有太多层了，每一层换一个参数。。。。：

	pool.reserve(L * 2);
	visited.reserve(L * 2);
	get_expanded_nodes(node, L, init_ids, pool, visited);

	void Index<T, TagT>::get_expanded_nodes(
	const size_t node_id, const unsigned Lindex,
	std::vector<unsigned> init_ids,
	std::vector<Neighbor> & expanded_nodes_info,
	tsl::robin_set<unsigned> &expanded_nodes_ids) {
	const T node_coords = _data + _aligned_dim node_id;

	if (init_ids.size() == 0)
	init_ids.emplace_back(_ep);

	std::vector<unsigned> des;
	std::vector<Neighbor> best_L_nodes;
	best_L_nodes.resize(Lindex + 1);
	tsl::robin_set<unsigned> inserted_into_pool_rs;
	boost::dynamic_bitset<> inserted_into_pool_bs;

	iterate_to_fixed_point(node_coords, Lindex, init_ids, expanded_nodes_info,
	expanded_nodes_ids, best_L_nodes, des,
	inserted_into_pool_rs, inserted_into_pool_bs);

	}

	std::pair<uint32_t, uint32_t> Index<T, TagT>::iterate_to_fixed_point(
	const T *node_coords, const unsigned Lsize,
	const std::vector<unsigned> &init_ids,
	std::vector<Neighbor> & expanded_nodes_info,
	tsl::robin_set<unsigned> & expanded_nodes_ids,
	std::vector<Neighbor> &best_L_nodes, std::vector<unsigned> &des,
	tsl::robin_set<unsigned> &inserted_into_pool_rs,
	boost::dynamic_bitset<> &inserted_into_pool_bs, bool ret_frozen,
	bool search_invocation)

这里代码首先把 init_ids（len=1，因为是去 center 作为 entrypoint 并且没有添加其他的元素）中 ep 给放到 best_L_nodes 中，并且设置为已经插入到 pool，inserted_into_pool_bs[id] = 1。

这里我 gdb 打断点后发现，vanama 根本没有使用初始化随机图，而是直接采用插入的方法。 鹅美静！

这里使用贪心算法，在图上进行检索，然后裁边：

	get_expanded_nodes(node, L, init_ids, pool, visited);

	// 添加该点现有的邻居节点

	prune_neighbors(node, pool, pruned_list);

这一块太抽象了，很难用文字描述，简单来说就是论文算法的实现。

这里它用 alpha 因子进行 occlude 的时候不是直接完成的，而且反复迭代增大初始因子进行的，这点很有意思。

现在分块构建的索引已经保存好了，需要做的就是 merge：

	diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index",
	merged_index_prefix + "_subshard-", "_ids_uint32.bin",
	num_parts, R, mem_index_path, medoids_file);

	int merge_shards(const std::string &vamana_prefix,
	const std::string &vamana_suffix,
	const std::string &idmaps_prefix,
	const std::string &idmaps_suffix, const _u64 nshards,
	unsigned max_degree, const std::string &output_vamana,
	const std::string &medoids_file)

ANNS

# 编译

# 基于内存方案

# Index 构建函数

# build_disk

CMake学习

MIT6.s081: lab4Trap