skip to content
Table of Contents

0. Prep

  1. 安装 ghidra pacman -S ghidra i. 写入 ghidra 的环境变量 echo "export GHIDRA_INSTALL_DIR=/opt/ghidra" >> ~/.zshrc (视情况而定)
  2. 安装 pyhidra pip install pyhidra
  3. [Optional] 安装 ghidra-stub pip install ghidra-stub

1. Usage

1.0 Helper Tools

我对 pyhidra 中的实现做了一些更改:

from helper import FAIL
import traceback
import atexit
from config import GHIDRA_BASE
# this file is a modified version pyhidra.core.open_program and pyhidra.core._setup_project
# original credit for pyhidra
import contextlib
from pathlib import Path
from typing import Union, ContextManager
from pyhidra.core import _analyze_program, _get_compiler_spec, _get_language
from pyhidra.converters import * # pylint: disable=wildcard-import, unused-wildcard-import
from ghidra.base.project import GhidraProject
from typing import Union, Tuple, ContextManager
from ghidra.program.model.listing import Program
_project: GhidraProject | None = None
_program_pool: dict[str, "Program"] = {}
def _setup_project2(
binary_path: Union[str, Path],
language: str | None = None,
compiler: str | None = None
) -> Tuple["GhidraProject", "Program"]:
from java.io import IOException
global _project
assert _project is not None
if binary_path is not None:
binary_path = Path(binary_path)
# Open/Create project
program: "Program" | None = None
project = _project
if binary_path is not None:
# if project.getRootFolder().getFile(binary_path.name): # since this not works
try:
program = project.openProgram("/", binary_path.name, False)
except IOException:
program = None
pass
if binary_path is not None and program is None:
if language is None:
program = project.importProgram(binary_path) # type: ignore
if program is None:
raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.")
else:
lang = _get_language(language)
comp = _get_compiler_spec(lang, compiler) # type: ignore
program = project.importProgram(binary_path, lang, comp) # type: ignore
if program is None:
message = f"Ghidra failed to import '{binary_path}'. "
if compiler:
message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid."
else:
message += f"The provided language ({language}) may be invalid."
raise ValueError(message)
project.saveAs(program, "/", binary_path.name, False) # True to False
assert program is not None
return program # type: ignore
def _setup_project(
binary_path: Union[str, Path],
project_location: Union[str, Path] = None,
project_name: str = None,
language: str = None,
compiler: str = None
) -> Tuple["GhidraProject", "Program"]:
from ghidra.base.project import GhidraProject
from java.io import IOException
if binary_path is not None:
binary_path = Path(binary_path)
if project_location:
project_location = Path(project_location)
else:
project_location = binary_path.parent
if not project_name:
project_name = f"{binary_path.name}_ghidra"
project_location = project_location / project_name
project_location.mkdir(exist_ok=True, parents=True)
# Open/Create project
program: "Program" = None
try:
project = GhidraProject.openProject(project_location, project_name, True)
if binary_path is not None:
# if project.getRootFolder().getFile(binary_path.name): # since this not works
program = project.openProgram("/", binary_path.name, False)
except IOException:
project = GhidraProject.createProject(project_location, project_name, False)
if binary_path is not None and program is None:
if language is None:
program = project.importProgram(binary_path)
if program is None:
raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.")
else:
lang = _get_language(language)
comp = _get_compiler_spec(lang, compiler)
program = project.importProgram(binary_path, lang, comp)
if program is None:
message = f"Ghidra failed to import '{binary_path}'. "
if compiler:
message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid."
else:
message += f"The provided language ({language}) may be invalid."
raise ValueError(message)
project.saveAs(program, "/", binary_path.name, False) # True to False
return project, program
@contextlib.contextmanager
def open_program(
binary_path: Path,
project_location: Union[str, Path] = None,
project_name: str = None,
analyze=True,
language: str = None,
compiler: str = None,
) -> ContextManager["FlatProgramAPI"]:
"""
Opens given binary path in Ghidra and returns FlatProgramAPI object.
:param binary_path: Path to binary file, may be None.
:param project_location: Location of Ghidra project to open/create.
(Defaults to same directory as binary file)
:param project_name: Name of Ghidra project to open/create.
(Defaults to name of binary file suffixed with "_ghidra")
:param analyze: Whether to run analysis before returning.
:param language: The LanguageID to use for the program.
(Defaults to Ghidra's detected LanguageID)
:param compiler: The CompilerSpecID to use for the program. Requires a provided language.
(Defaults to the Language's default compiler)
:return: A Ghidra FlatProgramAPI object.
:raises ValueError: If the provided language or compiler is invalid.
"""
global _project, _program_pool
from pyhidra.launcher import PyhidraLauncher, HeadlessPyhidraLauncher
if not PyhidraLauncher.has_launched():
HeadlessPyhidraLauncher().start()
from ghidra.app.script import GhidraScriptUtil
from ghidra.program.flatapi import FlatProgramAPI
program = _program_pool.get(binary_path.name, None)
if program == None:
if _project == None:
project, program = _setup_project(
binary_path,
project_location,
project_name,
language,
compiler
)
_project = project
# _project.setDeleteOnClose(True)
atexit.register(_project.close)
else:
program = _setup_project2(
binary_path,
language,
compiler
)
_program_pool[binary_path.name] = program
GhidraScriptUtil.acquireBundleHostReference()
try:
flat_api = FlatProgramAPI(program)
if analyze:
_analyze_program(flat_api, program)
# change base address
base_addr = flat_api.toAddr(GHIDRA_BASE)
if flat_api.currentProgram.getImageBase() != base_addr:
flat_api.currentProgram.setImageBase(base_addr, False)
assert flat_api.currentProgram.getImageBase() == base_addr
yield flat_api
except Exception as e:
FAIL("--- Error while opening program:", e)
FAIL(f"binary {binary_path} in {project_location}")
traceback.print_exc()
finally:
GhidraScriptUtil.releaseBundleHostReference()
# project.save(program)
# project.close()

1.1 基础用法

import pyhidra
pyhidra.start()
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import ghidra
from pathlib import Path
from ghidra.app.decompiler.flatapi import FlatDecompilerAPI
from ghidra.program.flatapi import FlatProgramAPI
DATASET_PATH = Path(".")
GHIDRA_DIR = Path(".") # Ghidra project location
GHIDRA_PROJECT_NAME = "test" # Ghidra project name
GHIDRA_BASE = 0x400000 # Ghidra base address, used in open_program
FUNC_ADDRS: set[str] = set() # function addresses in binary (base addr is 0x0 in default)
with open_program(DATASET_PATH / "test.so", GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api:
f_api: FlatProgramAPI = f_api
decomp_api = FlatDecompilerAPI(f_api)
listing = f_api.currentProgram.getListing()
for target in FUNC_ADDRS:
func = f_api.getFunctionAt(f_api.toAddr(hex(int(target.addr, 16) + GHIDRA_BASE)))
acode = [] # assembly code
for i in listing.getCodeUnits(func.getBody(), True):
acode.append(i.toString())
panic_if_not(func is not None, f"[-] function not found at {target.addr} in {bin_name}")
dcode = decomp_api.decompile(func, 0).strip() # decompiled code
acode = "\n".join(acode)
print(dcode, acode, sep="\n")

1.2 取 Callee 依赖方法

from pathlib import Path
from binary_search.pyhidra_helper import open_program
from config import GHIDRA_DIR, GHIDRA_PROJECT_NAME, GHIDRA_BASE
from helper import INFO, panic_if_not, WARNING, OK
from ghidra.program.flatapi import FlatProgramAPI
from ghidra.util.task import ConsoleTaskMonitor
from ghidra.program.model.listing import Function
from tqdm import tqdm
from copy import copy
def search_deps(function_addrs: set[str], elf: Path) -> set[tuple[str, str]]:
'''
Search for dependencies of given functions in given binary.
:param function_addrs: set of function addresses in Ghidra in hex string
:param elf: path to binary
:return: set of function addresses in Ghidra in hex string
'''
known_addrs: str[] = []
INFO(f"[+] binary searching for deps for {len(function_addrs)} functions in {elf}")
panic_if_not(elf.exists(), f"[-] {elf} not found")
funcs: set[tuple[str, str]] = set()
monitor = ConsoleTaskMonitor()
with open_program(elf, GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api:
f_api: FlatProgramAPI = f_api # for type hint
for addr in tqdm(function_addrs):
# panic_if_not(int(addr, 16) > GHIDRA_BASE, f"[-] invalid address for Ghidra base {addr} < {hex(GHIDRA_BASE)}")
func = f_api.getFunctionAt(f_api.toAddr(addr))
if func is None:
WARNING(f"[-] function not found at {addr} in {elf}")
continue
# find CFG
called_funcs: set[Function] = func.getCalledFunctions(monitor)
for f in called_funcs:
addr: str = f.getEntryPoint().toString() # type: ignore
if addr in known_addrs:
continue
known_addrs.add(addr)
funcs.add((f.getName(), addr)) # type: ignore
OK(f"[+] found {len(funcs)} functions as deps in {elf}")
return funcs
-EOF-