python 用 pyhidra 调用 Ghidra 执行二进制 static analysis
2024-02-09 23:10:53

0. Prep

  1. 安装 ghidra pacman -S ghidra
    i. 写入 ghidra 的环境变量 echo "export GHIDRA_INSTALL_DIR=/opt/ghidra" >> ~/.zshrc (视情况而定)
  2. 安装 pyhidra pip install pyhidra
  3. [Optional] 安装 ghidra-stub pip install ghidra-stub

1. Usage

1.0 Helper Tools

我对 pyhidra 中的实现做了一些更改:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
from helper import FAIL
import traceback
import atexit
from config import GHIDRA_BASE

# this file is a modified version pyhidra.core.open_program and pyhidra.core._setup_project
# original credit for pyhidra

import contextlib
from pathlib import Path
from typing import Union, ContextManager
from pyhidra.core import _analyze_program, _get_compiler_spec, _get_language
from pyhidra.converters import * # pylint: disable=wildcard-import, unused-wildcard-import
from ghidra.base.project import GhidraProject
from typing import Union, Tuple, ContextManager
from ghidra.program.model.listing import Program

_project: GhidraProject | None = None
_program_pool: dict[str, "Program"] = {}

def _setup_project2(
binary_path: Union[str, Path],
language: str | None = None,
compiler: str | None = None
) -> Tuple["GhidraProject", "Program"]:
from java.io import IOException
global _project
assert _project is not None
if binary_path is not None:
binary_path = Path(binary_path)

# Open/Create project
program: "Program" | None = None
project = _project
if binary_path is not None:
# if project.getRootFolder().getFile(binary_path.name): # since this not works
try:
program = project.openProgram("/", binary_path.name, False)
except IOException:
program = None
pass

if binary_path is not None and program is None:
if language is None:
program = project.importProgram(binary_path) # type: ignore
if program is None:
raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.")
else:
lang = _get_language(language)
comp = _get_compiler_spec(lang, compiler) # type: ignore
program = project.importProgram(binary_path, lang, comp) # type: ignore
if program is None:
message = f"Ghidra failed to import '{binary_path}'. "
if compiler:
message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid."
else:
message += f"The provided language ({language}) may be invalid."
raise ValueError(message)
project.saveAs(program, "/", binary_path.name, False) # True to False
assert program is not None
return program # type: ignore

def _setup_project(
binary_path: Union[str, Path],
project_location: Union[str, Path] = None,
project_name: str = None,
language: str = None,
compiler: str = None
) -> Tuple["GhidraProject", "Program"]:
from ghidra.base.project import GhidraProject
from java.io import IOException
if binary_path is not None:
binary_path = Path(binary_path)
if project_location:
project_location = Path(project_location)
else:
project_location = binary_path.parent
if not project_name:
project_name = f"{binary_path.name}_ghidra"
project_location = project_location / project_name
project_location.mkdir(exist_ok=True, parents=True)

# Open/Create project
program: "Program" = None
try:
project = GhidraProject.openProject(project_location, project_name, True)
if binary_path is not None:
# if project.getRootFolder().getFile(binary_path.name): # since this not works
program = project.openProgram("/", binary_path.name, False)
except IOException:
project = GhidraProject.createProject(project_location, project_name, False)

if binary_path is not None and program is None:
if language is None:
program = project.importProgram(binary_path)
if program is None:
raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.")
else:
lang = _get_language(language)
comp = _get_compiler_spec(lang, compiler)
program = project.importProgram(binary_path, lang, comp)
if program is None:
message = f"Ghidra failed to import '{binary_path}'. "
if compiler:
message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid."
else:
message += f"The provided language ({language}) may be invalid."
raise ValueError(message)
project.saveAs(program, "/", binary_path.name, False) # True to False

return project, program


@contextlib.contextmanager
def open_program(
binary_path: Path,
project_location: Union[str, Path] = None,
project_name: str = None,
analyze=True,
language: str = None,
compiler: str = None,
) -> ContextManager["FlatProgramAPI"]:
"""
Opens given binary path in Ghidra and returns FlatProgramAPI object.

:param binary_path: Path to binary file, may be None.
:param project_location: Location of Ghidra project to open/create.
(Defaults to same directory as binary file)
:param project_name: Name of Ghidra project to open/create.
(Defaults to name of binary file suffixed with "_ghidra")
:param analyze: Whether to run analysis before returning.
:param language: The LanguageID to use for the program.
(Defaults to Ghidra's detected LanguageID)
:param compiler: The CompilerSpecID to use for the program. Requires a provided language.
(Defaults to the Language's default compiler)
:return: A Ghidra FlatProgramAPI object.
:raises ValueError: If the provided language or compiler is invalid.
"""
global _project, _program_pool

from pyhidra.launcher import PyhidraLauncher, HeadlessPyhidraLauncher

if not PyhidraLauncher.has_launched():
HeadlessPyhidraLauncher().start()

from ghidra.app.script import GhidraScriptUtil
from ghidra.program.flatapi import FlatProgramAPI

program = _program_pool.get(binary_path.name, None)
if program == None:
if _project == None:
project, program = _setup_project(
binary_path,
project_location,
project_name,
language,
compiler
)
_project = project
# _project.setDeleteOnClose(True)
atexit.register(_project.close)
else:
program = _setup_project2(
binary_path,
language,
compiler
)
_program_pool[binary_path.name] = program
GhidraScriptUtil.acquireBundleHostReference()

try:
flat_api = FlatProgramAPI(program)

if analyze:
_analyze_program(flat_api, program)

# change base address
base_addr = flat_api.toAddr(GHIDRA_BASE)
if flat_api.currentProgram.getImageBase() != base_addr:
flat_api.currentProgram.setImageBase(base_addr, False)
assert flat_api.currentProgram.getImageBase() == base_addr

yield flat_api
except Exception as e:
FAIL("--- Error while opening program:", e)
FAIL(f"binary {binary_path} in {project_location}")
traceback.print_exc()
finally:
GhidraScriptUtil.releaseBundleHostReference()
# project.save(program)
# project.close()

1.1 基础用法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import pyhidra
pyhidra.start()
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import ghidra
from pathlib import Path
from ghidra.app.decompiler.flatapi import FlatDecompilerAPI
from ghidra.program.flatapi import FlatProgramAPI

DATASET_PATH = Path(".")
GHIDRA_DIR = Path(".") # Ghidra project location
GHIDRA_PROJECT_NAME = "test" # Ghidra project name
GHIDRA_BASE = 0x400000 # Ghidra base address, used in open_program
FUNC_ADDRS: set[str] = set() # function addresses in binary (base addr is 0x0 in default)
with open_program(DATASET_PATH / "test.so", GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api:
f_api: FlatProgramAPI = f_api
decomp_api = FlatDecompilerAPI(f_api)
listing = f_api.currentProgram.getListing()
for target in FUNC_ADDRS:
func = f_api.getFunctionAt(f_api.toAddr(hex(int(target.addr, 16) + GHIDRA_BASE)))
acode = [] # assembly code
for i in listing.getCodeUnits(func.getBody(), True):
acode.append(i.toString())
panic_if_not(func is not None, f"[-] function not found at {target.addr} in {bin_name}")
dcode = decomp_api.decompile(func, 0).strip() # decompiled code
acode = "\n".join(acode)
print(dcode, acode, sep="\n")

1.2 取 Callee 依赖方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from pathlib import Path
from binary_search.pyhidra_helper import open_program
from config import GHIDRA_DIR, GHIDRA_PROJECT_NAME, GHIDRA_BASE
from helper import INFO, panic_if_not, WARNING, OK
from ghidra.program.flatapi import FlatProgramAPI
from ghidra.util.task import ConsoleTaskMonitor
from ghidra.program.model.listing import Function
from tqdm import tqdm
from copy import copy

def search_deps(function_addrs: set[str], elf: Path) -> set[tuple[str, str]]:
'''
Search for dependencies of given functions in given binary.
:param function_addrs: set of function addresses in Ghidra in hex string
:param elf: path to binary
:return: set of function addresses in Ghidra in hex string
'''
known_addrs: str[] = []
INFO(f"[+] binary searching for deps for {len(function_addrs)} functions in {elf}")
panic_if_not(elf.exists(), f"[-] {elf} not found")
funcs: set[tuple[str, str]] = set()
monitor = ConsoleTaskMonitor()
with open_program(elf, GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api:
f_api: FlatProgramAPI = f_api # for type hint
for addr in tqdm(function_addrs):
# panic_if_not(int(addr, 16) > GHIDRA_BASE, f"[-] invalid address for Ghidra base {addr} < {hex(GHIDRA_BASE)}")
func = f_api.getFunctionAt(f_api.toAddr(addr))
if func is None:
WARNING(f"[-] function not found at {addr} in {elf}")
continue
# find CFG
called_funcs: set[Function] = func.getCalledFunctions(monitor)
for f in called_funcs:
addr: str = f.getEntryPoint().toString() # type: ignore
if addr in known_addrs:
continue
known_addrs.add(addr)
funcs.add((f.getName(), addr)) # type: ignore
OK(f"[+] found {len(funcs)} functions as deps in {elf}")
return funcs
-EOF-