From 7c2132510ac67a73a33d9aa6115379408ce1b9bf Mon Sep 17 00:00:00 2001 From: momo5502 Date: Sun, 3 Nov 2024 14:24:18 +0100 Subject: [PATCH] Cleanup registry parsing --- src/windows-emulator/registry/hive_parser.cpp | 244 +++++++++++++++ src/windows-emulator/registry/hive_parser.hpp | 287 ++++-------------- .../registry/registry_manager.cpp | 31 +- .../registry/registry_manager.hpp | 3 +- src/windows-emulator/syscalls.cpp | 18 +- 5 files changed, 325 insertions(+), 258 deletions(-) create mode 100644 src/windows-emulator/registry/hive_parser.cpp diff --git a/src/windows-emulator/registry/hive_parser.cpp b/src/windows-emulator/registry/hive_parser.cpp new file mode 100644 index 00000000..bd88b492 --- /dev/null +++ b/src/windows-emulator/registry/hive_parser.cpp @@ -0,0 +1,244 @@ +#include "hive_parser.hpp" + +// Based on this implementation: https://github.com/reahly/windows-hive-parser + +namespace +{ + constexpr uint64_t MAIN_ROOT_OFFSET = 0x1000; + constexpr uint64_t MAIN_KEY_BLOCK_OFFSET = MAIN_ROOT_OFFSET + 0x20; + + struct offset_entry_t + { + long offset; + long hash; + }; + + struct offsets_t + { + long block_size; + char block_type[2]; + short count; + offset_entry_t entries[1]; + }; + + struct key_block_t + { + long block_size; + char block_type[2]; + char dummya[18]; + int subkey_count; + char dummyb[4]; + int subkeys; + char dummyc[4]; + int value_count; + int offsets; + char dummyd[28]; + short len; + short du; + char name[255]; + }; + + struct value_block_t + { + long block_size; + char block_type[2]; + short name_len; + long size; + long offset; + long value_type; + short flags; + short dummy; + char name[255]; + }; + + bool read_file_data(std::ifstream& file, const uint64_t offset, void* buffer, const size_t size) + { + if (file.bad()) + { + return false; + } + + file.clear(); + + if (!file.good()) + { + return false; + } + + file.seekg(static_cast(offset)); + + if (!file.good()) + { + return false; + } + + file.read(static_cast(buffer), static_cast(size)); + + return file.good(); + } + + std::vector read_file_data(std::ifstream& file, const uint64_t offset, const size_t size) + { + std::vector result{}; + result.resize(size); + + if (read_file_data(file, offset, result.data(), size)) + { + return result; + } + + return {}; + } + + std::string read_file_data_string(std::ifstream& file, const uint64_t offset, const size_t size) + { + std::string result{}; + result.resize(size); + + if (read_file_data(file, offset, result.data(), size)) + { + return result; + } + + return {}; + } + + template + requires(std::is_trivially_copyable_v) + std::optional read_file_object(std::ifstream& file, const uint64_t offset, const size_t array_index = 0) + { + T obj{}; + if (read_file_data(file, offset + (array_index * sizeof(T)), &obj, sizeof(T))) + { + return {std::move(obj)}; + } + + return std::nullopt; + } + + template + requires(std::is_trivially_copyable_v) + T read_file_object_or_throw(std::ifstream& file, const uint64_t offset, const size_t array_index = 0) + { + auto result = read_file_object(file, offset, array_index); + if (!result) + { + throw std::runtime_error("Failed to read file object"); + } + + return std::move(*result); + } + + hive_key parse_root_block(std::ifstream& file, const std::filesystem::path& file_path) + { + if (read_file_data_string(file, 0, 4) != "regf") + { + throw std::runtime_error("Bad hive file: " + file_path.string()); + } + + const auto key_block = read_file_object_or_throw(file, MAIN_KEY_BLOCK_OFFSET); + + return {key_block.subkeys, key_block.value_count, key_block.offsets}; + } + + char char_to_lower(const char val) + { + return static_cast(std::tolower(static_cast(val))); + } + + bool ichar_equals(const char a, const char b) + { + return char_to_lower(a) == char_to_lower(b); + } + + bool iequals(std::string_view lhs, std::string_view rhs) + { + return std::ranges::equal(lhs, rhs, ichar_equals); + } +} + +const hive_value* hive_key::get_value(std::ifstream& file, const std::string_view name) +{ + this->parse(file); + + const auto entry = this->values_.find(name); + if (entry == this->values_.end()) + { + return nullptr; + } + + auto& value = entry->second; + + if (value.parsed) + { + value.data = read_file_data(file, MAIN_ROOT_OFFSET + value.data_offset, value.data_length); + value.parsed = true; + } + + return &value; +} + +void hive_key::parse(std::ifstream& file) +{ + if (this->parsed_) + { + return; + } + + this->parsed_ = true; + + // Values + + for (auto i = 0; i < this->value_count_; i++) + { + const auto offset = read_file_object_or_throw(file, MAIN_ROOT_OFFSET + this->value_offsets_ + 4, i); + const auto value = read_file_object_or_throw(file, MAIN_ROOT_OFFSET + offset); + + std::string value_name(value.name, std::min(value.name_len, static_cast(sizeof(value.name)))); + + raw_hive_value raw_value{}; + raw_value.parsed = false; + raw_value.type = value.value_type; + raw_value.name = value_name; + raw_value.data_length = value.size & 0xffff; + raw_value.data_offset = value.offset + 4; + + if (value.size & 1 << 31) + { + raw_value.data_offset = offset + static_cast(offsetof(value_block_t, offset)); + } + + std::ranges::transform(value_name, value_name.begin(), char_to_lower); + this->values_[std::move(value_name)] = std::move(raw_value); + } + + // Subkeys + + const auto item = read_file_object_or_throw(file, MAIN_ROOT_OFFSET + this->subkey_block_offset_); + + if (item.block_type[1] != 'f' && item.block_type[1] != 'h') + { + return; + } + + const auto entry_offsets = this->subkey_block_offset_ + offsetof(offsets_t, entries); + + for (short i = 0; i < item.count; ++i) + { + const auto offset_entry = read_file_object_or_throw(file, MAIN_ROOT_OFFSET + entry_offsets, i); + + const auto subkey_block_offset = MAIN_ROOT_OFFSET + offset_entry.offset; + const auto subkey = read_file_object_or_throw(file, subkey_block_offset); + + std::string subkey_name(subkey.name, std::min(subkey.len, static_cast(sizeof(subkey.name)))); + std::ranges::transform(subkey_name, subkey_name.begin(), char_to_lower); + + this->sub_keys_.emplace(std::move(subkey_name), hive_key{subkey.subkeys, subkey.value_count, subkey.offsets}); + } +} + +hive_parser::hive_parser(const std::filesystem::path& file_path) + : file_(file_path, std::ios::binary) + , root_key_(parse_root_block(file_, file_path)) +{ +} diff --git a/src/windows-emulator/registry/hive_parser.hpp b/src/windows-emulator/registry/hive_parser.hpp index 50cada54..462557d9 100644 --- a/src/windows-emulator/registry/hive_parser.hpp +++ b/src/windows-emulator/registry/hive_parser.hpp @@ -1,272 +1,99 @@ #pragma once -#include #include -#include -#include -#include -#include #include -// Based on this implementation: https://github.com/reahly/windows-hive-parser - -struct offset_entry_t +struct hive_value { - long offset; - long hash; + uint32_t type{}; + std::string name{}; + std::vector data{}; }; -struct offsets_t +class hive_key { - long block_size; - char block_type[2]; - short count; - offset_entry_t entries[0]; -}; - -struct key_block_t -{ - long block_size; - char block_type[2]; - char dummya[18]; - int subkey_count; - char dummyb[4]; - int subkeys; - char dummyc[4]; - int value_count; - int offsets; - char dummyd[28]; - short len; - short du; - char name[255]; -}; - -struct value_block_t -{ - long block_size; - char block_type[2]; - short name_len; - long size; - long offset; - long value_type; - short flags; - short dummy; - char name[255]; -}; - -namespace detail -{ - inline std::vector read_file(const std::filesystem::path& file_path) - { - std::ifstream file(file_path, std::ios::binary); - if (!file.is_open()) - { - return {}; - } - - return {std::istreambuf_iterator(file), std::istreambuf_iterator()}; - } -} - -class hive_key_t -{ - key_block_t* key_block; - uintptr_t main_root; - public: - explicit hive_key_t(): key_block(nullptr), main_root(0) + hive_key(const int subkey_block_offset, const int value_count, const int value_offsets) + : subkey_block_offset_(subkey_block_offset) + , value_count_(value_count) + , value_offsets_(value_offsets) { } - explicit hive_key_t(key_block_t* a, const uintptr_t b): key_block(a), main_root(b) + utils::unordered_string_map& get_sub_keys(std::ifstream& file) { + this->parse(file); + return this->sub_keys_; } - [[nodiscard]] std::vector subkeys_list() const + hive_key* get_sub_key(std::ifstream& file, const std::string_view name) { - const auto item = reinterpret_cast(this->main_root + key_block->subkeys); - if (item->block_type[1] != 'f' && item->block_type[1] != 'h') - return {}; + auto& sub_keys = this->get_sub_keys(file); + const auto entry = sub_keys.find(name); - std::vector out; - for (auto i = 0; i < key_block->subkey_count; i++) + if (entry == sub_keys.end()) { - const auto subkey = reinterpret_cast(item->entries[i].offset + this->main_root); - if (!subkey) - continue; - - out.emplace_back(subkey->name, subkey->len); + return nullptr; } - return out; + return &entry->second; } - [[nodiscard]] std::vector keys_list() const + const hive_value* get_value(std::ifstream& file, const std::string_view name); + +private: + struct raw_hive_value : hive_value { - if (!key_block->value_count) - return {}; + bool parsed{false}; + int data_offset{}; + size_t data_length{}; + }; - std::vector out; - for (auto i = 0; i < key_block->value_count; i++) - { - const auto value = reinterpret_cast(reinterpret_cast(key_block->offsets + this-> - main_root + 4)[i] + this->main_root); - if (!value) - continue; + bool parsed_{false}; + utils::unordered_string_map sub_keys_{}; + utils::unordered_string_map values_{}; - out.emplace_back(value->name, value->name_len); - } + const int subkey_block_offset_{}; + const int value_count_{}; + const int value_offsets_{}; - return out; - } - - using value = std::pair; - - std::optional get_key_value(const std::string_view& name) - { - for (auto i = 0; i < key_block->value_count; i++) - { - const auto value = reinterpret_cast(reinterpret_cast(key_block->offsets + this-> - main_root + 4)[i] + this->main_root); - if (!value || std::string_view(value->name, value->name_len) != name) - continue; - - auto data = reinterpret_cast(this->main_root + value->offset + 4); - if (value->size & 1 << 31) - data = reinterpret_cast(&value->offset); - - return std::make_pair(value->value_type, std::string_view(data, value->size & 0xffff)); - } - - return std::nullopt; - } + void parse(std::ifstream& file); }; class hive_parser { - struct hive_subpaths_t - { - std::string path; - hive_key_t data; - }; - - struct hive_cache_t - { - hive_key_t data; - std::vector subpaths; - }; - - key_block_t* main_key_block_data; - uintptr_t main_root; - std::vector file_data; - utils::unordered_string_map subkey_cache; - - void reclusive_search(const key_block_t* key_block_data, const std::string& current_path, - const bool is_reclusive = false) - { - if (!key_block_data) - return; - - const auto item = reinterpret_cast(main_root + key_block_data->subkeys); - if (item->block_type[1] != 'f' && item->block_type[1] != 'h') - return; - - for (auto i = 0; i < item->count; i++) - { - const auto subkey = reinterpret_cast(item->entries[i].offset + main_root); - if (!subkey) - continue; - - std::string_view subkey_name(subkey->name, subkey->len); - std::string full_path = current_path.empty() - ? std::string(subkey_name) - : std::string(current_path).append("/").append(subkey_name); - std::ranges::transform(full_path, full_path.begin(), ::tolower); - - if (!is_reclusive) - subkey_cache.try_emplace(full_path, hive_cache_t{ - hive_key_t{subkey, main_root}, std::vector{} - }); - - const auto extract_main_key = [ ](const std::string_view str) -> std::string_view - { - const size_t slash_pos = str.find('/'); - if (slash_pos == std::string::npos) - return str; - - return str.substr(0, slash_pos); - }; - - if (subkey->subkey_count > 0) - { - reclusive_search(subkey, full_path, true); - const auto entry = subkey_cache.find(extract_main_key(full_path)); - if (entry == subkey_cache.end()) - { - throw std::out_of_range("Invalid key"); - } - - entry->second.subpaths.emplace_back(hive_subpaths_t{ - full_path, hive_key_t{subkey, main_root} - }); - } - else - { - const auto entry = subkey_cache.find(extract_main_key(full_path)); - if (entry == subkey_cache.end()) - { - throw std::out_of_range("Invalid key"); - } - - entry->second.subpaths.emplace_back(full_path, hive_key_t{subkey, main_root}); - } - } - } - public: - explicit hive_parser(const std::filesystem::path& file_path) - : hive_parser(detail::read_file(file_path)) + explicit hive_parser(const std::filesystem::path& file_path); + + [[nodiscard]] hive_key* get_sub_key(const std::filesystem::path& key) { - } + hive_key* current_key = &this->root_key_; - explicit hive_parser(std::vector input_data) - : file_data(std::move(input_data)) - { - if (file_data.size() < 0x1020) - return; - - if (file_data.at(0) != 'r' && file_data.at(1) != 'e' && file_data.at(2) != 'g' && file_data.at(3) != 'f') - return; - - main_key_block_data = reinterpret_cast(reinterpret_cast(file_data.data() + 0x1020)); - main_root = reinterpret_cast(main_key_block_data) - 0x20; - - reclusive_search(main_key_block_data, ""); - } - - [[nodiscard]] bool success() const - { - return !subkey_cache.empty(); - } - - [[nodiscard]] std::optional get_subkey(const std::string_view key_name, - const std::string_view path) const - { - if (!subkey_cache.contains(key_name)) - return std::nullopt; - - const auto hive_block = subkey_cache.find(key_name); - if (hive_block == subkey_cache.end()) + for (const auto& key_part : key) { - throw std::out_of_range("Invalid key"); + if (!current_key) + { + return nullptr; + } + + current_key = current_key->get_sub_key(this->file_, key_part.string()); } - for (const auto& hive : hive_block->second.subpaths) + return current_key; + } + + [[nodiscard]] const hive_value* get_value(const std::filesystem::path& key, const std::string_view name) + { + auto* sub_key = this->get_sub_key(key); + if (!sub_key) { - if (hive.path == path) - return hive.data; + return nullptr; } - return std::nullopt; + return sub_key->get_value(this->file_, name); } + +private: + std::ifstream file_{}; + hive_key root_key_; }; diff --git a/src/windows-emulator/registry/registry_manager.cpp b/src/windows-emulator/registry/registry_manager.cpp index e758a9d2..8ff8a610 100644 --- a/src/windows-emulator/registry/registry_manager.cpp +++ b/src/windows-emulator/registry/registry_manager.cpp @@ -1,8 +1,10 @@ #include "registry_manager.hpp" -#include "hive_parser.hpp" +#include #include +#include "hive_parser.hpp" + namespace { std::filesystem::path canonicalize_path(const std::filesystem::path& key) @@ -31,11 +33,7 @@ namespace void register_hive(registry_manager::hive_map& hives, const std::filesystem::path& key, const std::filesystem::path& file) { - auto hive = std::make_unique(file); - if (hive && hive->success()) - { - hives[canonicalize_path(key)] = std::move(hive); - } + hives[canonicalize_path(key)] = std::make_unique(file); } } @@ -116,10 +114,10 @@ std::optional registry_manager::get_key(const std::filesystem::pat return {std::move(reg_key)}; } - const auto entry = iterator->second->get_subkey(reg_key.path.begin()->string(), reg_key.path.generic_string()); + const auto entry = iterator->second->get_sub_key(reg_key.path); if (!entry) { - return {}; + return std::nullopt; } return {std::move(reg_key)}; @@ -130,24 +128,19 @@ std::optional registry_manager::get_value(const registry_key& ke const auto iterator = this->hives_.find(key.hive); if (iterator == this->hives_.end()) { - return {}; + return std::nullopt; } - auto entry = iterator->second->get_subkey(key.path.begin()->string(), key.path.generic_string()); + auto* entry = iterator->second->get_value(key.path, name); if (!entry) { - return {}; - } - - const auto value = entry->get_key_value(name); - if (!value) - { - return {}; + return std::nullopt; } registry_value v{}; - v.type = value->first; - v.data = value->second; + v.type = entry->type; + v.name = entry->name; + v.data = entry->data; return v; } diff --git a/src/windows-emulator/registry/registry_manager.hpp b/src/windows-emulator/registry/registry_manager.hpp index b5e79d88..d63b7f32 100644 --- a/src/windows-emulator/registry/registry_manager.hpp +++ b/src/windows-emulator/registry/registry_manager.hpp @@ -26,7 +26,8 @@ struct registry_key struct registry_value { uint32_t type; - std::string_view data; + std::string_view name; + std::span data; }; class registry_manager diff --git a/src/windows-emulator/syscalls.cpp b/src/windows-emulator/syscalls.cpp index 19f7b58f..0c6a0ca5 100644 --- a/src/windows-emulator/syscalls.cpp +++ b/src/windows-emulator/syscalls.cpp @@ -94,8 +94,8 @@ namespace return STATUS_INVALID_HANDLE; } - const auto wide_name = read_unicode_string(c.emu, value_name); - const std::string name(wide_name.begin(), wide_name.end()); + const auto query_name = read_unicode_string(c.emu, value_name); + const std::string name(query_name.begin(), query_name.end()); const auto value = c.proc.registry.get_value(*key, name); if (!value) @@ -103,9 +103,11 @@ namespace return STATUS_OBJECT_NAME_NOT_FOUND; } + const std::wstring original_name(value->name.begin(), value->name.end()); + if (key_value_information_class == KeyValueBasicInformation) { - const auto required_size = sizeof(KEY_VALUE_BASIC_INFORMATION) + (wide_name.size() * 2) - 1; + const auto required_size = sizeof(KEY_VALUE_BASIC_INFORMATION) + (original_name.size() * 2) - 1; result_length.write(static_cast(required_size)); if (required_size > length) @@ -116,13 +118,13 @@ namespace KEY_VALUE_BASIC_INFORMATION info{}; info.TitleIndex = 0; info.Type = value->type; - info.NameLength = static_cast(wide_name.size() * 2); + info.NameLength = static_cast(original_name.size() * 2); const emulator_object info_obj{ c.emu, key_value_information }; info_obj.write(info); c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_BASIC_INFORMATION, Name), - wide_name.data(), + original_name.data(), info.NameLength); return STATUS_SUCCESS; @@ -155,7 +157,7 @@ namespace if (key_value_information_class == KeyValueFullInformation) { - const auto required_size = sizeof(KEY_VALUE_FULL_INFORMATION) + (wide_name.size() * 2) + value->data.size() - 1; + const auto required_size = sizeof(KEY_VALUE_FULL_INFORMATION) + (original_name.size() * 2) + value->data.size() - 1; result_length.write(static_cast(required_size)); if (required_size > length) @@ -167,13 +169,13 @@ namespace info.TitleIndex = 0; info.Type = value->type; info.DataLength = static_cast(value->data.size()); - info.NameLength = static_cast(wide_name.size() * 2); + info.NameLength = static_cast(original_name.size() * 2); const emulator_object info_obj{ c.emu, key_value_information }; info_obj.write(info); c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_BASIC_INFORMATION, Name), - wide_name.data(), + original_name.data(), info.NameLength); c.emu.write_memory(key_value_information + offsetof(KEY_VALUE_FULL_INFORMATION, Name) + info.NameLength,