Cleanup registry parsing

This commit is contained in:
momo5502
2024-11-03 14:24:18 +01:00
parent 17db05a410
commit 7c2132510a
5 changed files with 325 additions and 258 deletions

View File

@@ -0,0 +1,244 @@
#include "hive_parser.hpp"
// Based on this implementation: https://github.com/reahly/windows-hive-parser
namespace
{
constexpr uint64_t MAIN_ROOT_OFFSET = 0x1000;
constexpr uint64_t MAIN_KEY_BLOCK_OFFSET = MAIN_ROOT_OFFSET + 0x20;
struct offset_entry_t
{
long offset;
long hash;
};
struct offsets_t
{
long block_size;
char block_type[2];
short count;
offset_entry_t entries[1];
};
struct key_block_t
{
long block_size;
char block_type[2];
char dummya[18];
int subkey_count;
char dummyb[4];
int subkeys;
char dummyc[4];
int value_count;
int offsets;
char dummyd[28];
short len;
short du;
char name[255];
};
struct value_block_t
{
long block_size;
char block_type[2];
short name_len;
long size;
long offset;
long value_type;
short flags;
short dummy;
char name[255];
};
bool read_file_data(std::ifstream& file, const uint64_t offset, void* buffer, const size_t size)
{
if (file.bad())
{
return false;
}
file.clear();
if (!file.good())
{
return false;
}
file.seekg(static_cast<std::streamoff>(offset));
if (!file.good())
{
return false;
}
file.read(static_cast<char*>(buffer), static_cast<std::streamsize>(size));
return file.good();
}
std::vector<std::byte> read_file_data(std::ifstream& file, const uint64_t offset, const size_t size)
{
std::vector<std::byte> result{};
result.resize(size);
if (read_file_data(file, offset, result.data(), size))
{
return result;
}
return {};
}
std::string read_file_data_string(std::ifstream& file, const uint64_t offset, const size_t size)
{
std::string result{};
result.resize(size);
if (read_file_data(file, offset, result.data(), size))
{
return result;
}
return {};
}
template <typename T>
requires(std::is_trivially_copyable_v<T>)
std::optional<T> read_file_object(std::ifstream& file, const uint64_t offset, const size_t array_index = 0)
{
T obj{};
if (read_file_data(file, offset + (array_index * sizeof(T)), &obj, sizeof(T)))
{
return {std::move(obj)};
}
return std::nullopt;
}
template <typename T>
requires(std::is_trivially_copyable_v<T>)
T read_file_object_or_throw(std::ifstream& file, const uint64_t offset, const size_t array_index = 0)
{
auto result = read_file_object<T>(file, offset, array_index);
if (!result)
{
throw std::runtime_error("Failed to read file object");
}
return std::move(*result);
}
hive_key parse_root_block(std::ifstream& file, const std::filesystem::path& file_path)
{
if (read_file_data_string(file, 0, 4) != "regf")
{
throw std::runtime_error("Bad hive file: " + file_path.string());
}
const auto key_block = read_file_object_or_throw<key_block_t>(file, MAIN_KEY_BLOCK_OFFSET);
return {key_block.subkeys, key_block.value_count, key_block.offsets};
}
char char_to_lower(const char val)
{
return static_cast<char>(std::tolower(static_cast<unsigned char>(val)));
}
bool ichar_equals(const char a, const char b)
{
return char_to_lower(a) == char_to_lower(b);
}
bool iequals(std::string_view lhs, std::string_view rhs)
{
return std::ranges::equal(lhs, rhs, ichar_equals);
}
}
const hive_value* hive_key::get_value(std::ifstream& file, const std::string_view name)
{
this->parse(file);
const auto entry = this->values_.find(name);
if (entry == this->values_.end())
{
return nullptr;
}
auto& value = entry->second;
if (value.parsed)
{
value.data = read_file_data(file, MAIN_ROOT_OFFSET + value.data_offset, value.data_length);
value.parsed = true;
}
return &value;
}
void hive_key::parse(std::ifstream& file)
{
if (this->parsed_)
{
return;
}
this->parsed_ = true;
// Values
for (auto i = 0; i < this->value_count_; i++)
{
const auto offset = read_file_object_or_throw<int>(file, MAIN_ROOT_OFFSET + this->value_offsets_ + 4, i);
const auto value = read_file_object_or_throw<value_block_t>(file, MAIN_ROOT_OFFSET + offset);
std::string value_name(value.name, std::min(value.name_len, static_cast<short>(sizeof(value.name))));
raw_hive_value raw_value{};
raw_value.parsed = false;
raw_value.type = value.value_type;
raw_value.name = value_name;
raw_value.data_length = value.size & 0xffff;
raw_value.data_offset = value.offset + 4;
if (value.size & 1 << 31)
{
raw_value.data_offset = offset + static_cast<int>(offsetof(value_block_t, offset));
}
std::ranges::transform(value_name, value_name.begin(), char_to_lower);
this->values_[std::move(value_name)] = std::move(raw_value);
}
// Subkeys
const auto item = read_file_object_or_throw<offsets_t>(file, MAIN_ROOT_OFFSET + this->subkey_block_offset_);
if (item.block_type[1] != 'f' && item.block_type[1] != 'h')
{
return;
}
const auto entry_offsets = this->subkey_block_offset_ + offsetof(offsets_t, entries);
for (short i = 0; i < item.count; ++i)
{
const auto offset_entry = read_file_object_or_throw<offset_entry_t>(file, MAIN_ROOT_OFFSET + entry_offsets, i);
const auto subkey_block_offset = MAIN_ROOT_OFFSET + offset_entry.offset;
const auto subkey = read_file_object_or_throw<key_block_t>(file, subkey_block_offset);
std::string subkey_name(subkey.name, std::min(subkey.len, static_cast<short>(sizeof(subkey.name))));
std::ranges::transform(subkey_name, subkey_name.begin(), char_to_lower);
this->sub_keys_.emplace(std::move(subkey_name), hive_key{subkey.subkeys, subkey.value_count, subkey.offsets});
}
}
hive_parser::hive_parser(const std::filesystem::path& file_path)
: file_(file_path, std::ios::binary)
, root_key_(parse_root_block(file_, file_path))
{
}

View File

@@ -1,272 +1,99 @@
#pragma once
#include <string>
#include <fstream>
#include <vector>
#include <ranges>
#include <cwctype>
#include <optional>
#include <utils/container.hpp>
// Based on this implementation: https://github.com/reahly/windows-hive-parser
struct offset_entry_t
struct hive_value
{
long offset;
long hash;
uint32_t type{};
std::string name{};
std::vector<std::byte> data{};
};
struct offsets_t
class hive_key
{
long block_size;
char block_type[2];
short count;
offset_entry_t entries[0];
};
struct key_block_t
{
long block_size;
char block_type[2];
char dummya[18];
int subkey_count;
char dummyb[4];
int subkeys;
char dummyc[4];
int value_count;
int offsets;
char dummyd[28];
short len;
short du;
char name[255];
};
struct value_block_t
{
long block_size;
char block_type[2];
short name_len;
long size;
long offset;
long value_type;
short flags;
short dummy;
char name[255];
};
namespace detail
{
inline std::vector<char> read_file(const std::filesystem::path& file_path)
{
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open())
{
return {};
}
return {std::istreambuf_iterator(file), std::istreambuf_iterator<char>()};
}
}
class hive_key_t
{
key_block_t* key_block;
uintptr_t main_root;
public:
explicit hive_key_t(): key_block(nullptr), main_root(0)
hive_key(const int subkey_block_offset, const int value_count, const int value_offsets)
: subkey_block_offset_(subkey_block_offset)
, value_count_(value_count)
, value_offsets_(value_offsets)
{
}
explicit hive_key_t(key_block_t* a, const uintptr_t b): key_block(a), main_root(b)
utils::unordered_string_map<hive_key>& get_sub_keys(std::ifstream& file)
{
this->parse(file);
return this->sub_keys_;
}
[[nodiscard]] std::vector<std::string_view> subkeys_list() const
hive_key* get_sub_key(std::ifstream& file, const std::string_view name)
{
const auto item = reinterpret_cast<offsets_t*>(this->main_root + key_block->subkeys);
if (item->block_type[1] != 'f' && item->block_type[1] != 'h')
return {};
auto& sub_keys = this->get_sub_keys(file);
const auto entry = sub_keys.find(name);
std::vector<std::string_view> out;
for (auto i = 0; i < key_block->subkey_count; i++)
if (entry == sub_keys.end())
{
const auto subkey = reinterpret_cast<key_block_t*>(item->entries[i].offset + this->main_root);
if (!subkey)
continue;
out.emplace_back(subkey->name, subkey->len);
return nullptr;
}
return out;
return &entry->second;
}
[[nodiscard]] std::vector<std::string_view> keys_list() const
const hive_value* get_value(std::ifstream& file, const std::string_view name);
private:
struct raw_hive_value : hive_value
{
if (!key_block->value_count)
return {};
bool parsed{false};
int data_offset{};
size_t data_length{};
};
std::vector<std::string_view> out;
for (auto i = 0; i < key_block->value_count; i++)
{
const auto value = reinterpret_cast<value_block_t*>(reinterpret_cast<int*>(key_block->offsets + this->
main_root + 4)[i] + this->main_root);
if (!value)
continue;
bool parsed_{false};
utils::unordered_string_map<hive_key> sub_keys_{};
utils::unordered_string_map<raw_hive_value> values_{};
out.emplace_back(value->name, value->name_len);
}
const int subkey_block_offset_{};
const int value_count_{};
const int value_offsets_{};
return out;
}
using value = std::pair<long, std::string_view>;
std::optional<value> get_key_value(const std::string_view& name)
{
for (auto i = 0; i < key_block->value_count; i++)
{
const auto value = reinterpret_cast<value_block_t*>(reinterpret_cast<int*>(key_block->offsets + this->
main_root + 4)[i] + this->main_root);
if (!value || std::string_view(value->name, value->name_len) != name)
continue;
auto data = reinterpret_cast<char*>(this->main_root + value->offset + 4);
if (value->size & 1 << 31)
data = reinterpret_cast<char*>(&value->offset);
return std::make_pair(value->value_type, std::string_view(data, value->size & 0xffff));
}
return std::nullopt;
}
void parse(std::ifstream& file);
};
class hive_parser
{
struct hive_subpaths_t
{
std::string path;
hive_key_t data;
};
struct hive_cache_t
{
hive_key_t data;
std::vector<hive_subpaths_t> subpaths;
};
key_block_t* main_key_block_data;
uintptr_t main_root;
std::vector<char> file_data;
utils::unordered_string_map<hive_cache_t> subkey_cache;
void reclusive_search(const key_block_t* key_block_data, const std::string& current_path,
const bool is_reclusive = false)
{
if (!key_block_data)
return;
const auto item = reinterpret_cast<offsets_t*>(main_root + key_block_data->subkeys);
if (item->block_type[1] != 'f' && item->block_type[1] != 'h')
return;
for (auto i = 0; i < item->count; i++)
{
const auto subkey = reinterpret_cast<key_block_t*>(item->entries[i].offset + main_root);
if (!subkey)
continue;
std::string_view subkey_name(subkey->name, subkey->len);
std::string full_path = current_path.empty()
? std::string(subkey_name)
: std::string(current_path).append("/").append(subkey_name);
std::ranges::transform(full_path, full_path.begin(), ::tolower);
if (!is_reclusive)
subkey_cache.try_emplace(full_path, hive_cache_t{
hive_key_t{subkey, main_root}, std::vector<hive_subpaths_t>{}
});
const auto extract_main_key = [ ](const std::string_view str) -> std::string_view
{
const size_t slash_pos = str.find('/');
if (slash_pos == std::string::npos)
return str;
return str.substr(0, slash_pos);
};
if (subkey->subkey_count > 0)
{
reclusive_search(subkey, full_path, true);
const auto entry = subkey_cache.find(extract_main_key(full_path));
if (entry == subkey_cache.end())
{
throw std::out_of_range("Invalid key");
}
entry->second.subpaths.emplace_back(hive_subpaths_t{
full_path, hive_key_t{subkey, main_root}
});
}
else
{
const auto entry = subkey_cache.find(extract_main_key(full_path));
if (entry == subkey_cache.end())
{
throw std::out_of_range("Invalid key");
}
entry->second.subpaths.emplace_back(full_path, hive_key_t{subkey, main_root});
}
}
}
public:
explicit hive_parser(const std::filesystem::path& file_path)
: hive_parser(detail::read_file(file_path))
explicit hive_parser(const std::filesystem::path& file_path);
[[nodiscard]] hive_key* get_sub_key(const std::filesystem::path& key)
{
}
hive_key* current_key = &this->root_key_;
explicit hive_parser(std::vector<char> input_data)
: file_data(std::move(input_data))
{
if (file_data.size() < 0x1020)
return;
if (file_data.at(0) != 'r' && file_data.at(1) != 'e' && file_data.at(2) != 'g' && file_data.at(3) != 'f')
return;
main_key_block_data = reinterpret_cast<key_block_t*>(reinterpret_cast<uintptr_t>(file_data.data() + 0x1020));
main_root = reinterpret_cast<uintptr_t>(main_key_block_data) - 0x20;
reclusive_search(main_key_block_data, "");
}
[[nodiscard]] bool success() const
{
return !subkey_cache.empty();
}
[[nodiscard]] std::optional<hive_key_t> get_subkey(const std::string_view key_name,
const std::string_view path) const
{
if (!subkey_cache.contains(key_name))
return std::nullopt;
const auto hive_block = subkey_cache.find(key_name);
if (hive_block == subkey_cache.end())
for (const auto& key_part : key)
{
throw std::out_of_range("Invalid key");
if (!current_key)
{
return nullptr;
}
current_key = current_key->get_sub_key(this->file_, key_part.string());
}
for (const auto& hive : hive_block->second.subpaths)
return current_key;
}
[[nodiscard]] const hive_value* get_value(const std::filesystem::path& key, const std::string_view name)
{
auto* sub_key = this->get_sub_key(key);
if (!sub_key)
{
if (hive.path == path)
return hive.data;
return nullptr;
}
return std::nullopt;
return sub_key->get_value(this->file_, name);
}
private:
std::ifstream file_{};
hive_key root_key_;
};

View File

@@ -1,8 +1,10 @@
#include "registry_manager.hpp"
#include "hive_parser.hpp"
#include <cwctype>
#include <serialization_helper.hpp>
#include "hive_parser.hpp"
namespace
{
std::filesystem::path canonicalize_path(const std::filesystem::path& key)
@@ -31,11 +33,7 @@ namespace
void register_hive(registry_manager::hive_map& hives,
const std::filesystem::path& key, const std::filesystem::path& file)
{
auto hive = std::make_unique<hive_parser>(file);
if (hive && hive->success())
{
hives[canonicalize_path(key)] = std::move(hive);
}
hives[canonicalize_path(key)] = std::make_unique<hive_parser>(file);
}
}
@@ -116,10 +114,10 @@ std::optional<registry_key> registry_manager::get_key(const std::filesystem::pat
return {std::move(reg_key)};
}
const auto entry = iterator->second->get_subkey(reg_key.path.begin()->string(), reg_key.path.generic_string());
const auto entry = iterator->second->get_sub_key(reg_key.path);
if (!entry)
{
return {};
return std::nullopt;
}
return {std::move(reg_key)};
@@ -130,24 +128,19 @@ std::optional<registry_value> registry_manager::get_value(const registry_key& ke
const auto iterator = this->hives_.find(key.hive);
if (iterator == this->hives_.end())
{
return {};
return std::nullopt;
}
auto entry = iterator->second->get_subkey(key.path.begin()->string(), key.path.generic_string());
auto* entry = iterator->second->get_value(key.path, name);
if (!entry)
{
return {};
}
const auto value = entry->get_key_value(name);
if (!value)
{
return {};
return std::nullopt;
}
registry_value v{};
v.type = value->first;
v.data = value->second;
v.type = entry->type;
v.name = entry->name;
v.data = entry->data;
return v;
}

View File

@@ -26,7 +26,8 @@ struct registry_key
struct registry_value
{
uint32_t type;
std::string_view data;
std::string_view name;
std::span<const std::byte> data;
};
class registry_manager

View File

@@ -94,8 +94,8 @@ namespace
return STATUS_INVALID_HANDLE;
}
const auto wide_name = read_unicode_string(c.emu, value_name);
const std::string name(wide_name.begin(), wide_name.end());
const auto query_name = read_unicode_string(c.emu, value_name);
const std::string name(query_name.begin(), query_name.end());
const auto value = c.proc.registry.get_value(*key, name);
if (!value)
@@ -103,9 +103,11 @@ namespace
return STATUS_OBJECT_NAME_NOT_FOUND;
}
const std::wstring original_name(value->name.begin(), value->name.end());
if (key_value_information_class == KeyValueBasicInformation)
{
const auto required_size = sizeof(KEY_VALUE_BASIC_INFORMATION) + (wide_name.size() * 2) - 1;
const auto required_size = sizeof(KEY_VALUE_BASIC_INFORMATION) + (original_name.size() * 2) - 1;
result_length.write(static_cast<ULONG>(required_size));
if (required_size > length)
@@ -116,13 +118,13 @@ namespace
KEY_VALUE_BASIC_INFORMATION info{};
info.TitleIndex = 0;
info.Type = value->type;
info.NameLength = static_cast<ULONG>(wide_name.size() * 2);
info.NameLength = static_cast<ULONG>(original_name.size() * 2);
const emulator_object<KEY_VALUE_BASIC_INFORMATION> info_obj{ c.emu, key_value_information };
info_obj.write(info);
c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_BASIC_INFORMATION, Name),
wide_name.data(),
original_name.data(),
info.NameLength);
return STATUS_SUCCESS;
@@ -155,7 +157,7 @@ namespace
if (key_value_information_class == KeyValueFullInformation)
{
const auto required_size = sizeof(KEY_VALUE_FULL_INFORMATION) + (wide_name.size() * 2) + value->data.size() - 1;
const auto required_size = sizeof(KEY_VALUE_FULL_INFORMATION) + (original_name.size() * 2) + value->data.size() - 1;
result_length.write(static_cast<ULONG>(required_size));
if (required_size > length)
@@ -167,13 +169,13 @@ namespace
info.TitleIndex = 0;
info.Type = value->type;
info.DataLength = static_cast<ULONG>(value->data.size());
info.NameLength = static_cast<ULONG>(wide_name.size() * 2);
info.NameLength = static_cast<ULONG>(original_name.size() * 2);
const emulator_object<KEY_VALUE_FULL_INFORMATION> info_obj{ c.emu, key_value_information };
info_obj.write(info);
c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_BASIC_INFORMATION, Name),
wide_name.data(),
original_name.data(),
info.NameLength);
c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_FULL_INFORMATION, Name) + info.NameLength,