python 用 pyhidra 调用 Ghidra 执行二进制 static analysis
/ 6 min read
Table of Contents
0. Prep
- 安装
ghidra
pacman -S ghidra
i. 写入ghidra
的环境变量echo "export GHIDRA_INSTALL_DIR=/opt/ghidra" >> ~/.zshrc
(视情况而定) - 安装
pyhidra
pip install pyhidra
- [Optional] 安装
ghidra-stub
pip install ghidra-stub
1. Usage
1.0 Helper Tools
我对 pyhidra
中的实现做了一些更改:
from helper import FAILimport tracebackimport atexitfrom config import GHIDRA_BASE
# this file is a modified version pyhidra.core.open_program and pyhidra.core._setup_project# original credit for pyhidra
import contextlibfrom pathlib import Pathfrom typing import Union, ContextManagerfrom pyhidra.core import _analyze_program, _get_compiler_spec, _get_languagefrom pyhidra.converters import * # pylint: disable=wildcard-import, unused-wildcard-importfrom ghidra.base.project import GhidraProjectfrom typing import Union, Tuple, ContextManagerfrom ghidra.program.model.listing import Program
_project: GhidraProject | None = None_program_pool: dict[str, "Program"] = {}
def _setup_project2( binary_path: Union[str, Path], language: str | None = None, compiler: str | None = None) -> Tuple["GhidraProject", "Program"]: from java.io import IOException global _project assert _project is not None if binary_path is not None: binary_path = Path(binary_path)
# Open/Create project program: "Program" | None = None project = _project if binary_path is not None: # if project.getRootFolder().getFile(binary_path.name): # since this not works try: program = project.openProgram("/", binary_path.name, False) except IOException: program = None pass
if binary_path is not None and program is None: if language is None: program = project.importProgram(binary_path) # type: ignore if program is None: raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.") else: lang = _get_language(language) comp = _get_compiler_spec(lang, compiler) # type: ignore program = project.importProgram(binary_path, lang, comp) # type: ignore if program is None: message = f"Ghidra failed to import '{binary_path}'. " if compiler: message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid." else: message += f"The provided language ({language}) may be invalid." raise ValueError(message) project.saveAs(program, "/", binary_path.name, False) # True to False assert program is not None return program # type: ignore
def _setup_project( binary_path: Union[str, Path], project_location: Union[str, Path] = None, project_name: str = None, language: str = None, compiler: str = None) -> Tuple["GhidraProject", "Program"]: from ghidra.base.project import GhidraProject from java.io import IOException if binary_path is not None: binary_path = Path(binary_path) if project_location: project_location = Path(project_location) else: project_location = binary_path.parent if not project_name: project_name = f"{binary_path.name}_ghidra" project_location = project_location / project_name project_location.mkdir(exist_ok=True, parents=True)
# Open/Create project program: "Program" = None try: project = GhidraProject.openProject(project_location, project_name, True) if binary_path is not None: # if project.getRootFolder().getFile(binary_path.name): # since this not works program = project.openProgram("/", binary_path.name, False) except IOException: project = GhidraProject.createProject(project_location, project_name, False)
if binary_path is not None and program is None: if language is None: program = project.importProgram(binary_path) if program is None: raise RuntimeError(f"Ghidra failed to import '{binary_path}'. Try providing a language manually.") else: lang = _get_language(language) comp = _get_compiler_spec(lang, compiler) program = project.importProgram(binary_path, lang, comp) if program is None: message = f"Ghidra failed to import '{binary_path}'. " if compiler: message += f"The provided language/compiler pair ({language} / {compiler}) may be invalid." else: message += f"The provided language ({language}) may be invalid." raise ValueError(message) project.saveAs(program, "/", binary_path.name, False) # True to False
return project, program
@contextlib.contextmanagerdef open_program( binary_path: Path, project_location: Union[str, Path] = None, project_name: str = None, analyze=True, language: str = None, compiler: str = None,) -> ContextManager["FlatProgramAPI"]: """ Opens given binary path in Ghidra and returns FlatProgramAPI object.
:param binary_path: Path to binary file, may be None. :param project_location: Location of Ghidra project to open/create. (Defaults to same directory as binary file) :param project_name: Name of Ghidra project to open/create. (Defaults to name of binary file suffixed with "_ghidra") :param analyze: Whether to run analysis before returning. :param language: The LanguageID to use for the program. (Defaults to Ghidra's detected LanguageID) :param compiler: The CompilerSpecID to use for the program. Requires a provided language. (Defaults to the Language's default compiler) :return: A Ghidra FlatProgramAPI object. :raises ValueError: If the provided language or compiler is invalid. """ global _project, _program_pool
from pyhidra.launcher import PyhidraLauncher, HeadlessPyhidraLauncher
if not PyhidraLauncher.has_launched(): HeadlessPyhidraLauncher().start()
from ghidra.app.script import GhidraScriptUtil from ghidra.program.flatapi import FlatProgramAPI
program = _program_pool.get(binary_path.name, None) if program == None: if _project == None: project, program = _setup_project( binary_path, project_location, project_name, language, compiler ) _project = project # _project.setDeleteOnClose(True) atexit.register(_project.close) else: program = _setup_project2( binary_path, language, compiler ) _program_pool[binary_path.name] = program GhidraScriptUtil.acquireBundleHostReference()
try: flat_api = FlatProgramAPI(program)
if analyze: _analyze_program(flat_api, program)
# change base address base_addr = flat_api.toAddr(GHIDRA_BASE) if flat_api.currentProgram.getImageBase() != base_addr: flat_api.currentProgram.setImageBase(base_addr, False) assert flat_api.currentProgram.getImageBase() == base_addr
yield flat_api except Exception as e: FAIL("--- Error while opening program:", e) FAIL(f"binary {binary_path} in {project_location}") traceback.print_exc() finally: GhidraScriptUtil.releaseBundleHostReference() # project.save(program) # project.close()
1.1 基础用法
import pyhidrapyhidra.start()from typing import TYPE_CHECKINGif TYPE_CHECKING: import ghidrafrom pathlib import Pathfrom ghidra.app.decompiler.flatapi import FlatDecompilerAPIfrom ghidra.program.flatapi import FlatProgramAPI
DATASET_PATH = Path(".")GHIDRA_DIR = Path(".") # Ghidra project locationGHIDRA_PROJECT_NAME = "test" # Ghidra project nameGHIDRA_BASE = 0x400000 # Ghidra base address, used in open_programFUNC_ADDRS: set[str] = set() # function addresses in binary (base addr is 0x0 in default)with open_program(DATASET_PATH / "test.so", GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api: f_api: FlatProgramAPI = f_api decomp_api = FlatDecompilerAPI(f_api) listing = f_api.currentProgram.getListing() for target in FUNC_ADDRS: func = f_api.getFunctionAt(f_api.toAddr(hex(int(target.addr, 16) + GHIDRA_BASE))) acode = [] # assembly code for i in listing.getCodeUnits(func.getBody(), True): acode.append(i.toString()) panic_if_not(func is not None, f"[-] function not found at {target.addr} in {bin_name}") dcode = decomp_api.decompile(func, 0).strip() # decompiled code acode = "\n".join(acode) print(dcode, acode, sep="\n")
1.2 取 Callee 依赖方法
from pathlib import Pathfrom binary_search.pyhidra_helper import open_programfrom config import GHIDRA_DIR, GHIDRA_PROJECT_NAME, GHIDRA_BASEfrom helper import INFO, panic_if_not, WARNING, OKfrom ghidra.program.flatapi import FlatProgramAPIfrom ghidra.util.task import ConsoleTaskMonitorfrom ghidra.program.model.listing import Functionfrom tqdm import tqdmfrom copy import copy
def search_deps(function_addrs: set[str], elf: Path) -> set[tuple[str, str]]: ''' Search for dependencies of given functions in given binary. :param function_addrs: set of function addresses in Ghidra in hex string :param elf: path to binary :return: set of function addresses in Ghidra in hex string ''' known_addrs: str[] = [] INFO(f"[+] binary searching for deps for {len(function_addrs)} functions in {elf}") panic_if_not(elf.exists(), f"[-] {elf} not found") funcs: set[tuple[str, str]] = set() monitor = ConsoleTaskMonitor() with open_program(elf, GHIDRA_DIR, GHIDRA_PROJECT_NAME) as f_api: f_api: FlatProgramAPI = f_api # for type hint for addr in tqdm(function_addrs): # panic_if_not(int(addr, 16) > GHIDRA_BASE, f"[-] invalid address for Ghidra base {addr} < {hex(GHIDRA_BASE)}") func = f_api.getFunctionAt(f_api.toAddr(addr)) if func is None: WARNING(f"[-] function not found at {addr} in {elf}") continue # find CFG called_funcs: set[Function] = func.getCalledFunctions(monitor) for f in called_funcs: addr: str = f.getEntryPoint().toString() # type: ignore if addr in known_addrs: continue known_addrs.add(addr) funcs.add((f.getName(), addr)) # type: ignore OK(f"[+] found {len(funcs)} functions as deps in {elf}") return funcs