basic website complete

- ported posts from Efiens
- add CV
- add MachO obfuscation whitepaper
This commit is contained in:
nganhkhoa 2023-10-25 01:12:36 +07:00
parent 7a8ce17237
commit 7968743f3c
16 changed files with 1785 additions and 45 deletions

98
app/Route/Blog.elm Normal file
View File

@ -0,0 +1,98 @@
module Route.Blog exposing (ActionData, Data, Model, Msg, route)
import Article
import BackendTask exposing (BackendTask)
import Date exposing (Date)
import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled exposing (..)
import Json.Decode as Decode exposing (Decoder)
import Json.Decode.Extra
import Pages.Url
import PagesMsg exposing (PagesMsg)
import Route exposing (Route)
import RouteBuilder exposing (App, StatelessRoute)
import Shared
import View exposing (View)
import Link
import Markdown.Block
import Markdown.Renderer
import MarkdownCodec
import TailwindMarkdownRenderer
import Tailwind.Utilities as Tw
type alias Model =
{}
type alias Msg =
()
type alias RouteParams =
{}
route : StatelessRoute RouteParams Data ActionData
route =
RouteBuilder.single
{ head = head
, data = data
}
|> RouteBuilder.buildNoState { view = view }
type alias Data =
List (Route, Article.ArticleMetadata)
type alias ActionData =
{}
data : BackendTask FatalError Data
data =
Article.blogAllMetadata
|> BackendTask.allowFatal
head :
App Data ActionData RouteParams
-> List Head.Tag
head app =
Seo.summary
{ canonicalUrlOverride = Nothing
, siteName = "elm-pages"
, image =
{ url = Pages.Url.external "TODO"
, alt = "elm-pages logo"
, dimensions = Nothing
, mimeType = Nothing
}
, description = "TODO"
, locale = Nothing
, title = "TODO title" -- metadata.title -- TODO
}
|> Seo.website
view :
App Data ActionData RouteParams
-> Shared.Model
-> View msg
view app shared =
{ title = "title"
, body = app.data
|> List.map renderBlogItem
}
renderBlogItem : (Route, Article.ArticleMetadata) -> Html msg
renderBlogItem (route_, article) =
Link.link (Link.internal route_) []
[ div []
[ div []
[ text article.title
, text article.summary
]
]
]

View File

@ -86,7 +86,7 @@ frontmatterDecoder : Decoder ArticleMetadata
frontmatterDecoder =
Decode.map4 ArticleMetadata
(Decode.field "title" Decode.string)
(Decode.field "description" Decode.string)
(Decode.field "summary" Decode.string)
(Decode.field "published"
(Decode.string
|> Decode.andThen
@ -136,10 +136,5 @@ view app shared =
(app.data.body
|> Markdown.Renderer.render TailwindMarkdownRenderer.renderer
|> Result.withDefault []
|> processReturn
)
}
processReturn : List (Html Msg) -> List (Html (PagesMsg Msg))
processReturn =
List.map (Html.Styled.map (PagesMsg.fromMsg))

View File

@ -5,6 +5,7 @@ import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled as Html
import Html.Styled.Attributes as Attributes
import Link exposing (Link)
import Pages.Url
import PagesMsg exposing (PagesMsg)
@ -77,18 +78,19 @@ view :
-> Shared.Model
-> View (PagesMsg Msg)
view app shared =
{ title = "elm-pages is running"
{ title = "Anh Khoa Nguyen"
, body =
[ Html.h1 [] [ Html.text "elm-pages is up and running!" ]
, Html.p []
[ Html.text <| "The message is: " ++ app.data.message
[ Html.p []
[ Html.text <| "Welcome to my personal website, where I post random things and thoughts."
]
, Link.link (Link.internal (Route.Blog__Slug_ { slug = "a" })) [] [ Html.text "My blog post" ]
, Link.link (Link.internal (Route.Blog__Slug_ { slug = "" })) [] [ Html.text "Blogs" ]
, Html.br [] []
, Link.link (Link.internal (Route.Osx__Slug_ { slug = "" })) [] [ Html.text "OSX Series" ]
, Html.br [] []
, Html.text "Here is my CV:"
, Link.link (Link.external cvpdf) [Attributes.target "_blank"] [Html.text "CV.pdf"]
]
|> processReturn
}
processReturn : List (Html.Html Msg) -> List (Html.Html (PagesMsg Msg))
processReturn =
List.map (Html.map (PagesMsg.fromMsg))
cvpdf : String
cvpdf = "cv.pdf"

113
app/Route/Osx.elm Normal file
View File

@ -0,0 +1,113 @@
module Route.Osx exposing (ActionData, Data, Model, Msg, route)
import Article
import BackendTask exposing (BackendTask)
import Date exposing (Date)
import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled exposing (..)
import Html.Styled.Attributes as Attributes
import Json.Decode as Decode exposing (Decoder)
import Json.Decode.Extra
import Pages.Url
import PagesMsg exposing (PagesMsg)
import Route exposing (Route)
import RouteBuilder exposing (App, StatelessRoute)
import Shared
import View exposing (View)
import Link
import Markdown.Block
import Markdown.Renderer
import MarkdownCodec
import TailwindMarkdownRenderer
import Tailwind.Utilities as Tw
type alias Model =
{}
type alias Msg =
()
type alias RouteParams =
{}
route : StatelessRoute RouteParams Data ActionData
route =
RouteBuilder.single
{ head = head
, data = data
}
|> RouteBuilder.buildNoState { view = view }
type alias Data =
List (Route, Article.ArticleMetadata)
type alias ActionData =
{}
data : BackendTask FatalError Data
data =
Article.osxAllMetadata
|> BackendTask.allowFatal
head :
App Data ActionData RouteParams
-> List Head.Tag
head app =
Seo.summary
{ canonicalUrlOverride = Nothing
, siteName = "elm-pages"
, image =
{ url = Pages.Url.external "TODO"
, alt = "elm-pages logo"
, dimensions = Nothing
, mimeType = Nothing
}
, description = "TODO"
, locale = Nothing
, title = "TODO title" -- metadata.title -- TODO
}
|> Seo.website
view :
App Data ActionData RouteParams
-> Shared.Model
-> View msg
view app shared =
{ title = "title"
, body =
[ div []
[ text "For years, I learned how the Apple binary format works."
, text "There are blog posts that I wrote when I first started learning about them."
, text "If you want to read them, here they are below, ported from the efiens blog."
, ul []
(List.map (\item -> li [] [item]) oldBlogs)
, br [] []
, text "I gree my idea in injection into an obfuscation scheme for MachO binary."
, text "In the following whitepaper, I writeup all steps in this obfuscation scheme."
, Link.link (Link.external whitepaper)
[Attributes.target "_blank"]
[text "macho-obfuscation.pdf"]
]
]
}
oldBlogs : List (Html msg)
oldBlogs =
[ (Link.link (Link.internal (Route.Osx__Slug_ { slug = "macho" })) [] [text "Macho"] )
, (Link.link (Link.internal (Route.Osx__Slug_ { slug = "linker" })) [] [text "Linker"] )
, (Link.link (Link.internal (Route.Osx__Slug_ { slug = "fairplay" })) [] [text "Fairplay"] )
, (Link.link (Link.internal (Route.Osx__Slug_ { slug = "inject" })) [] [text "Inject"] )
]
whitepaper : String
whitepaper = "/macho-obfuscation.pdf"

141
app/Route/Osx/Slug_.elm Normal file
View File

@ -0,0 +1,141 @@
module Route.Osx.Slug_ exposing (ActionData, Data, Model, Msg, route)
import Article
import BackendTask exposing (BackendTask)
import Date exposing (Date)
import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled exposing (..)
import Json.Decode as Decode exposing (Decoder)
import Json.Decode.Extra
import Pages.Url
import PagesMsg exposing (PagesMsg)
import RouteBuilder exposing (App, StatelessRoute)
import Shared
import View exposing (View)
import Markdown.Block
import Markdown.Renderer
import MarkdownCodec
import TailwindMarkdownRenderer
import Tailwind.Utilities as Tw
type alias Model =
{}
type alias Msg =
()
type alias RouteParams =
{ slug : String }
route : StatelessRoute RouteParams Data ActionData
route =
RouteBuilder.preRender
{ head = head
, pages = pages
, data = data
}
|> RouteBuilder.buildNoState { view = view }
pages : BackendTask FatalError (List RouteParams)
pages =
Article.osxPostsGlob
|> BackendTask.map
(List.map
(\globData ->
{ slug = globData.slug }
)
)
type alias Data =
{ metadata : ArticleMetadata
, body : List Markdown.Block.Block
}
type alias ActionData =
{}
data : RouteParams -> BackendTask FatalError Data
data routeParams =
MarkdownCodec.withFrontmatter Data
frontmatterDecoder
TailwindMarkdownRenderer.renderer
("content/osx/" ++ routeParams.slug ++ ".md")
type alias ArticleMetadata =
{ title : String
, description : String
, published : Date
-- , image : Pages.Url.Url
, draft : Bool
}
frontmatterDecoder : Decoder ArticleMetadata
frontmatterDecoder =
Decode.map4 ArticleMetadata
(Decode.field "title" Decode.string)
(Decode.field "summary" Decode.string)
(Decode.field "published"
(Decode.string
|> Decode.andThen
(\isoString ->
Date.fromIsoString isoString
|> Json.Decode.Extra.fromResult
)
)
)
-- (Decode.oneOf
-- [ Decode.field "image" imageDecoder
-- , Decode.field "unsplash" UnsplashImage.decoder |> Decode.map UnsplashImage.imagePath
-- ]
-- )
(Decode.field "draft" Decode.bool
|> Decode.maybe
|> Decode.map (Maybe.withDefault False)
)
head :
App Data ActionData RouteParams
-> List Head.Tag
head app =
Seo.summary
{ canonicalUrlOverride = Nothing
, siteName = "elm-pages"
, image =
{ url = Pages.Url.external "TODO"
, alt = "elm-pages logo"
, dimensions = Nothing
, mimeType = Nothing
}
, description = "TODO"
, locale = Nothing
, title = "TODO title" -- metadata.title -- TODO
}
|> Seo.website
view :
App Data ActionData RouteParams
-> Shared.Model
-> View (PagesMsg Msg)
view app shared =
{ title = "title"
, body =
(app.data.body
|> Markdown.Renderer.render TailwindMarkdownRenderer.renderer
|> Result.withDefault []
)
}

View File

@ -1,6 +0,0 @@
---
title: hello world
description: just hello
published: "2023-10-24"
---
helloworld

View File

@ -0,0 +1,261 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Address Translation"
subtitle: ""
summary: ""
tags: ["address translation", "memory", "os internals"]
categories: []
published: "2020-11-21"
lastmod: 2020-11-21T19:42:45+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: []
---
In this post, we revisit the concept of virtual address, learn and practice
translating address from virtual to physical and vise-versa. As a bonus, we
also learn about the Windows' process and how each process has a different DTB.
# Virtual Address
## The Concept
Memory is limited, for laptops it is often around 8GB to 16GB, for desk
computer it's could be as much as 64GB. No matter how much physical memory is
installed, operating system still use virtual address to isolate process's
space.
When a process is created, it lives in it owns address space. The same address
in two processes point to two different places on the physical memory. The
kernel too has an isolated memory space. Most OSes implement the kernel space
to be visible (not neccessarily accessible) by every process (probably due to
kernel mode switching ???).
Virtual address is possible because the OS does not load every thing on to the
memory. It splits data into pages of either 4KB or 2MB. If the data is
disk-bounded, then when a certain *Read Only* page is used, it will be mapped
onto the physical memory. For a Writable page, it will written out to the disk
at some particular place when unused. If the page is non existed, the OS will
not perform any thing. A process running could have a wide range of address,
though only some pages are accessible, the rest are NULL. An example in Windows
64-bit, a process could have as much as 2TB[1], however only some small pages
scattered accross the 2TB range are valid.
## Directory Table
Each process has its own table to translate the virtual address to physical
address. This table is called directory table base. The pointer to the table is
stored in a process CR3 register. CR3 is often called with another name,
Directory Table Base. Every process, when looking up a virtual address will
have to resolve into physical address using the CR3. If the page is invalid,
read and write action will cause exception to the process. The OS often return
a zero value for read operations and crash the process for invalid write. How
the OS handles depends on its implementation.
We won't go deep into how Directory Table is organized just yet, but here is a
few things to note. The Directory Table is a table of with 512
pointers/addresses. Depends on the type of paging, we could have nested table
until the address in the table points to a physical memory. In orther words,
Directory Table Entry points to another table, and that table entry could point
to another table or a physical address. The last entry, the physical address,
is the physical address of the page after the translation.
# Address Translation
Let's go deeper! How can we really translate a virtual address into its
physical address? How can we know if the page is valid?
## CR4 and multi-level paging
### PAE and Long Mode
In 32-bit system, we usually can't have more than 4GB of user virtual memory
space. PAE (Physical Address Extension) mode can extend this to give more
memory. Long Mode is required to switch from 32-bit to 64-bit (64-bit registers
are available as well as 64-bit specific instructions). When Long Mode is
enabled, PAE must also be enabled.
### Paging Modes
Intel processors (and AMD ones) have 5 different paging modes. Paging Mode
defines how the processor should look for a physical address given a virtual
address. The 5 paging modes are "None", "32-bit", "PAE", "4-level", "5-level".
To know which paging mode is being used, we reference the PAE, Long Mode and
CR4 register value.
| Paging Mode | Paging | PAE | Long Mode | Level-5 |
|-------------|--------|-----|-----------|---------|
| None | 0 | | | |
| 32-bit | 1 | 0 | | |
| PAE | 1 | 1 | 0 | |
| 4-level | 1 | 1 | 1 | 0 |
| 5-level | 1 | 1 | 1 | 1 |
> Paging check is bit 31 in CR0, PAE check is bit 5 in CR4, Long Mode is
specified through bit 8th of IA32_EFER, Level-5 check is bit 12 in CR4.
64-bit architectures are Long Mode so either the paging mode is 4-level or
5-level. And because paging is default in today's computer, Paging value is
always 1, thus we only need to check for CR4 values.
### Paging Structures
"None" paging mode is a one-one mapping which requires no additional
translation, however the rest of the paging modes requires translation through
the set of paging structure. Paging structures are tables as we've described
above, with the size of the table is guaranteed to be 4096 bytes. For "32-bit"
paging mode, there are 1024 entries in the table where each entry is 32-bit in
size. For "PAE", "4-level" and "5-level" paging modes, there are 512 entries in
the table where each entry is 64-bit in size.
The paging structures are named and for each paging modes the entries will
point to another paging structures (with exception for the last table where it
points to a physical address). The names for the structures are Page table,
Page Directory, Page-Directory-Pointer table, PML4 table, PML5 table. We often
refer to them using their acronyms: PT, PD, PDPT, PML4, PML5 with their entries
type PTE, PDE, PDPTE, PML4E, PML5E.
## Address Translation
After knowing about the paging modes and paging structures, we will go deeper
into how a virtual address is translated into physical address. Because there
are many paging modes and going them one-by-one will make this post lengthly, I
will only go through 4-Level paging mode as this is used in 64-bit machines as
default. Other paging modes are the same, with the different in which paging
structure is used.
### 4KB Paging
In Figure 1, we can see how the virtual address (linear address) are
segmented into index of each paging structure and eventually points the 4KB
Page at a physical address.
![Figure 1. 4KB Page in 4-Level Paging](./paging_mode_4_level.png)
Using bit fields in C structure, we can rewrite the virtual address value using
the structure VirtualAddress.
```c
struct {
int64_t offset : 12,
int64_t table : 9,
int64_t directory : 9,
int64_t directory_ptr : 9,
int64_t pml4 : 9,
} VirtualAddress;
```
First of, the CR3 register contains the physical address of PML4 table.
```c
struct {
int64_t present : 1;
int64_t reserved : 11;
int64_t _ptr : 36;
} PML4E;
typedef PML4E[512] PML4;
PML4* cr3;
```
Using pml4 of VirtualAddress we get the PML4E entry that the virtual address
points to. PML4E has a pointer to PDPT, which is the physical address of PDPT.
Using this we get the next table.
```c
PML4E e = (*cr3)[addr.pml4];
assert(e.present == 1) // entry must be present
struct {
int64_t present : 1;
int64_t reserved : 6;
int64_t one_gb_page : 1;
int64_t ignored : 4;
int64_t _ptr : 36;
} PDPTE;
typedef PDPTE[512] PDPT;
PDPT* pdpt_ptr = e._ptr;
```
The case when PDPTE's `one_gb_page == 1` will be discussed later, we assumed
`one_gb_page == 0`. Then, `_ptr` contains the physical address of the next
table `Page Directory`.
```c
PDPTE e = (*pdpt_ptr)[addr.directory_ptr];
assert(e.present == 1) // entry must be present
assert(e.one_gb_page == 0) // in this context
struct {
int64_t present : 1;
int64_t reserved : 6;
int64_t two_mb_page : 1;
int64_t ignored : 4;
int64_t _ptr : 36;
} PDE;
typedef PDE[512] PDT;
PDT* pdt_ptr = e._ptr;
```
The case when PDE's `two_mb_page == 1` will be discussed later, we assumed
`two_mb_page == 0`. Then `_ptr` contains the physical address of the next table
`Page Table`.
```c
PDE e = (*pdt_ptr)[addr.directory];
assert(e.present == 1) // entry must be present
assert(e.two_mb_page == 0) // in this context
struct {
int64_t present : 1;
int64_t reserved : 11;
int64_t _ptr : 36;
} PTE;
typedef PTE[512] PageTable;
PageTable* pt_ptr = e._ptr;
```
`pt_ptr` contains a series of pointer to 4KB Page on the physical memory. To
get the page that maps with the virtual address, we perform:
```c
PTE e = (*pt_ptr)[addr.table];
int64_t page = e._ptr;
```
When the corresponding page for the virtual address is found, we get the exact
address for the virtual address with the offset.
```c
int64_t physical_address = page + addr.offset;
```
## 2MB Paging
## 1GB Paging
To be updated

View File

@ -0,0 +1,183 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Kernel and Boot Debug in Windows"
subtitle: ""
summary: ""
tags: [windows, windbg, kernel debug, boot debug]
categories: []
published: "2020-11-27"
lastmod: 2020-11-27T12:43:11+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: []
---
A memo to debug the kernel of Windows with WinDbg
Open the Windows virtual machine cmd as admin. Open a COM port
```
bcdedit /dbgsettings serial debugport:1 baudrate:115200
```
To enable debug in kernel mode
```
bcdedit /debug on
```
To enable debug the boot manager and bootloader:
```
bcdedit /bootdebug {bootmgr} on # boot manager
bcdedit /bootdebug {current} on # bootloader
```
Shutdown the machine. Edit the setting in VirtualBox/VMWare:
1. Open a serial port 1
2. Set the mode to Pipe
3. Set the path to `\\.\pipe\pipename`
Open WinDbg or WinDbg Preview. Choose attach to kernel. Set the baudrate as specified and Port as `\\.\pipe\pipename`.
Turn on initial break to break automatically when a module is loaded in memory (bootmgr, winload.exe, winload.efi, ntoskrnl.exe).
Run the VM. Unless inital break is on, else press break to break.
Simple table to understand
| Command | Debug What | |
|---------------------------------|---------------------------|----------------------|
| bcdedit /debug on | ntoskrnl.exe | Windows Kernel |
| bcdedit /bootdebug {bootmgr} on | bootmgr.exe / bootmgr.efi | Windows Boot Manager |
| bcdedit /bootdebug {current} on | winload.exe / winload.efi | Windows Bootloader |
Sufffix .efi is used in UEFI boot mode.
[Windows reference on boot process](https://docs.microsoft.com/en-us/windows/client-management/advanced-troubleshooting-boot-problems)
If we want to set inital break on a profile without initial break, enter `sxe ibp`. If we want to break at a spcecific module `sxe ld:module`.
WinDbg also supports live kernel debug, livekd. Just attach to the local kernel as admin.
If using Windows 7 the debug symbols (pdb files) are not automatically downloaded. Must specify the path to the Microsoft symbol server:
```txt
.sympath srv*https://msdl.microsoft.com/download/symbols
```
Set your WinDbg layout as you wish, I recommend this layout. Left is the disassembly, Right is the command, below left are modules, below right are breakpoints, threads, stack, watchpoint.
![my windbg layout](./windbglayout.png)
Some commands for process listing:
```
dx Debugger.Utility.Collections.FromListEntry( *(nt!_LIST_ENTRY*)&(nt!PsActiveProcessHead), "nt!_EPROCESS", "ActiveProcessLinks")
dx Debugger.Utility.Collections.FromListEntry( *(nt!_LIST_ENTRY*)&(afd!AfdEndpointListHead), "nt!_EPROCESS", "ActiveProcessLinks")
dx Debugger.Utility.Collections.FromListEntry( *(nt!_LIST_ENTRY*)&(nt!KiProcessListHead), "nt!_KPROCESS", "ProcessListEntry").Select( p => new {Process = (nt!_EPROCESS*)&p} )
dx Debugger.Utility.Collections.FromListEntry(*(nt!_LIST_ENTRY*)&nt!HandleTableListHead, "nt!_HANDLE_TABLE", "HandleTableList").Where(h => h.QuotaProcess != 0).Select( qp => new {Process= qp.QuotaProcess} )
```
Quick reference:
```txt
# print the system information
vertarget
# dump the address ast TYPE
dt TYPE ADDR [optional fields]
# print string
du
# print bytes at
db ADDR
dc ADDR # character
# print words at
dw ADDR
# print dwords at
dd ADDR
# print qword at
dq ADDR
# read physical address
!db
!dc
!dd
!dp
!du
!dw
# disassembly at
u ADDR
u FUNCTION
# break at
bp ADDR
bp FUNCTION
# next
p
# continue
g
# print stack trace
k
# list loaded modules
lm
# evaluate
? <expr>
# read register
r
r <reg>
# reference register
@rax
@rdx
>> dd @rax
>> dd @rax+0x10
>> dd @rax+10h
>> dd @rax+16
# list all process
!process 0 0
```
Update when I know more.
- `poi`
- `.process`
- Address Translation
- more?

View File

@ -0,0 +1,466 @@
---
title: "An Introduction to Symbolic Execution"
subtitle: ""
summary: "In this post, I introduce you to a program analysis technique called Symbolic Execution, its components, an in-theory perfect symbolic execution engine, some late writeup on challenges that I've used this technique and lastly the ANGR binary analysis platform"
tags: ["symbolic execution", "program analysis", "binary analysis", "writeup"]
categories: []
published: "2020-12-13"
lastmod: 2020-12-13T21:18:44+07:00
featured: false
draft: false
---
# The problem
```c
1: void foobar(int a, int b) {
2: int x = 1, y = 0;
3: if (a != 0) {
4: y = 3 + x;
5: if (b == 0)
6: x = 2 * (a + b)
7: }
8: assert(x - y != 0)
9: }
```
In the above code, we want to find values of a and b such that the assertion holds. There are many ways of finding a and b. A trial-and-error method may output the solution. Such method could be slow and inefficient. Another way to approach the problem is calculating the values of x and y at each code path.
Line 1, $a, b \in Z$
Line 2, $x = 1, y = 0$
Line 3, if $a = 0$, then the assertion is correct ($x = 1, y = 0, a = 0, b \in Z$).
Line 3, if $a \neq 0$, then $y = 3 + x = 3 + 1 = 4$.
Line 5, if $b \neq 0$, the assertion is correct ($x = 1, y = 4, a \neq 0, b = 0$)
Line 5, if $b = 0$, $x = 2 * (a + b)$. This yields assertion $2 * (a + b) - 4 \neq 0$. And pass the assertion only if $a + b \neq 2 | a \neq 0, b = 0$, which simplifies to $a = 2, b = 0$.
Here, we do not look at concrete values of a and b, instead we use a and b as mathematical symbols and give them a range of value ($Z$) to start off with and reduce down after each if/else statement. We only know the valid values of a and b after all statements are finished.
The using of a and b like mathematical symbols (x in "find x" exercises in highschool math) to find values satisfying condition(s) is the Symbolic part of "Symbolic Execution". Using these symbols and run through the code testing at each code path is Execution in "Symbolic Execution".
# Symbolic Execution Engine
A program that runs Symbolic Execution is called Symbolic Execution Engine. This program can run a given function or even a program and test whether the conditions are met. The program must use a kind of SMT Solver to create the Symbolic variables and later combine the constraints to solve for the Symbolic variables. Because going through all code paths is not feasible (path explosion) in general applications, these engines must develop and use exploration technique(s) to limit the runtime memory.
## Simulator/Emulator
One of the main components of the Engine is the Simulator/Emulator. The code/program to be proven must be run inside a simulator which assigns values to Symbolic variables and builds up the constraints. A good Simulator/Emulator will enable the Engine to inspect the variables at runtime and dynamically create Symbolic variables along with their constraints.
Simulator/Emulator are not limited to those run on binary and bytecode, they could be ones that execute on an AST or even a JIT engine. Symbolic Execution Engines only need to know the declarion of variables and the constraints related to these variables.
A simulator/emulator is not preferred in [KLEE](https://github.com/klee/klee), a LLVM-based Symbolic Execution Engine. KLEE builds the code with Symbolic variables and uses them to solve the constraints at runtime.
## SMT Solver
An indispensable component of a Symbolic Execution Engine, SMT Solver. SMT Solver is a constraints solving system, it provides Symbolic variables and solves the constraints to give each Symbolic variables a possible value.
A notable SMT Solver is the Z3 Theorem Prover.
```python
from z3 import *
s = Solver()
a = Int('a')
b = Int('b')
s.add(a > 2)
s.add(b < 5)
s.add(a + b == 6)
s.check()
s.model()
```
`Int a` and `Int b` is the Symbolic variables, we slowly add constraints to the `Solver s` and output the model. Symbolic Execution Engine works just like that, with a state manager and an exploration technique.
## Exploration Technique
Symbolic Execution Engine must explore the code to build up the constraints. Each time a branch is found, the code path splits into two or more.
> Consider if (a == X || b == Y || c == Z), how many branches do we have?
To solve this problem, many researches focus on exploration techniques to minize the memory needed for a successful run.
> I will update this after I have read about these techniques :(
## An In-theory Perfect Symbolic Execution Engine
With those 3 components, a perfect Symbolic Execution Engine relies on the perfection of those component.
For binaries, the Simulator/Emulator must be able to work with different binary formats and architectures. Binary format are limited (PE32, PE32+, ELF, Mach-O); however architectures may vary, and to work efficiently, we need an Intermediate Representation (IR).
For programming languages, a unified language is required to work accross languages without writing custom Engine for each language.
SMT Solver works with variables like Int, Float, but in languages like C where pointers are a valid types. Our Engine must handle the pointers to a Symbolic variable.
Exploration technique is critical for a good Engine, it should use minimal memory and can minize/reduce the state explored.
# CTF Challenges
I solved these challenges with my scripting skill. At that time, I did not know about Symbolic Execution. However, after having learnt about Symbolic Execution, I am able to reason about my approach and analyze my solving script in details.
## InCTF 2019 Easy-VM
I do not see any writeup on this problem, I don't know if this is a trivial one or a hard one. Here's my attempt to solve it using my scripting skill and now explain it using Symbolic Execution.
This challenge has two files, an excutable binary and an unknown format binary file. The executable uses control flow flattening to obfuscate the control flow; however, we can track the state variable to know the next instruction. The executable read the unknown format binary file and execute the instruction inside that file.
> Pardon me, it's been too long so I don't remember how the binary exactly works, I might rework the challenge to give a more indepth explaination in future update. I just my solve script as a pointer, so it might be wrong.
The unknown format binary file is a list of 8 bytes instruction/data packed tightly, with the first 4 bytes "bi0s" and second 4 bytes (??). There are 10 registers in the form of array and 9 global variables used across the program. This program also use an array of 10000 items (unsure type) for storing/loading data.
```python
class Program:
def __init__(self):
raw_bytes = open('crackme.i', 'rb').read()[8:]
self.code = [ int.from_bytes(raw_bytes[i:i+8], byteorder='little') for i in range(0, len(raw_bytes), 8) ]
self.register = [0 for i in range(10)]
self.instruction = 0
self.data_10 = 0
self.data_6 = 0
self.data_4 = 0
self.data_7 = 0
self.data_8 = 0
self.data_3 = 0
self.data_5 = 0
self.data_11 = 0
self.data_12 = 0
self.code_data = [0 for i in range(10000)]
```
Each instruction are decoded as follows:
```python
def decode(self, ins):
self.instruction = ins;
self.data_10 = ~(~ins | 0xF000FFFFFFFFFFFF) >> 52;
self.data_6 = (ins & (ins ^ 0xFFF0FFFFFFFFFFFF)) >> 48;
self.data_4 = (ins & (ins ^ 0xFFFF0FFFFFFFFFFF)) >> 44;
self.data_7 = (ins & (ins ^ 0xFFFFF0FFFFFFFFFF)) >> 40;
self.data_8 = ~(~ins | 0xFFFFFF0FFFFFFFFF) >> 36;
self.data_3 = ~(~ins | 0xFFFFFFF000FFFFFF) >> 24;
self.data_5 = ~(~ins | 0xFF000000);
self.data_11 = (~(~ins | 0xFFFFFFF000FFFFFF) >> 24) // 5;
if self.data_5 >= 0x10:
self.data_5 = int(hex(self.data_5)[-2:], 16)
```
A global variable will be used to select the next code:
```python
data_11 = self.data_11
if data_11 == 0:
self.switch_code2()
elif data_11 == 1:
self.switch_code3()
elif data_11 == 2:
self.switch_code1()
else:
print("invalid code?")
```
`switch_code1` and `switch_code3` select the operation based on `data_3`
```python
def switch_code1(self):
data_3 = self.data_3
if data_3 == 10:
return self.add()
if data_3 == 11:
print("end routine")
return 1
if data_3 == 12:
return self.jump()
if data_3 == 13:
print("has_current_eip")
return 1
if data_3 == 14:
return self.foo7()
return 0
def switch_code3(self):
data_3 = self.data_3
if data_3 == 5:
return self.cmp()
if data_3 == 6:
return self.foo1()
if data_3 == 7:
return self.mul()
if data_3 == 8:
return self.shift_left()
if data_3 == 9:
return self.shift_right()
return 0
```
`switch_code2` uses `data_3` for operation selection but also uses `data_4` for data input and output.
```python
def switch_code2(self):
data_3 = self.data_3
data_4 = self.data_4
register = self.register
if data_3 == 0:
return self.load()
if data_3 == 1:
return self.save()
if data_3 == 2:
register[data_4] = get_char()
return 1
if data_3 == 3:
print(chr(register[data_4]))
return 1
if data_3 == 4:
return self.mov()
return 0
```
After learning that compare and jump are used to divert the flow to the wrong path, I know that we need to find a way to find a satisfying path for each input. I add a SMT solver, for each input, I create a Symbolic variable and for comparision, I add the constraints.
```python
# __init__
self.s = Solver()
self.flag_count = 0
self.vars = []
...
# switch_code2
if data_3 == 2:
print("register[{}] = get_char()".format(data_4))
register[data_4] = Int('flag_{:>3}'.format(self.flag_count))
self.s.add(register[data_4] >= 0)
self.s.add(register[data_4] <= 255)
self.flag_count += 1
return 1
def cmp(self):
data_4 = self.data_4
data_7 = self.data_7
data_6 = self.data_6
register = self.register
if data_6 == 0:
print("register[{}] != register[{}]".format(data_7, data_4))
print(register[data_4] == register[data_7])
self.s.add(register[data_4] == register[data_7])
return 1
if data_6 == 1:
print("register[{}] > register[{}]".format(data_7, data_4))
print(register[data_4] <= register[data_7])
self.s.add(register[data_4] <= register[data_7])
return 1
if data_6 == 2:
print("register[{}] < register[{}]".format(data_7, data_4))
print(register[data_4] >= register[data_7])
self.s.add(register[data_4] >= register[data_7])
return 1
return 0
```
After that, I wrote code to simulate all other instructions. Solving the model yields us the flag:
`inctf{1_kN0w_1t5_R3411y_3z_&_fuNNy_but_1ts_h0n3st_w0rk!}`
The full code is [here](https://github.com/nganhkhoa/ctf-writeup/blob/master/2019/inctf/easy-vm/run.py).
Warning: bad code, will clean up someday.
## De1CTF 2020 Code Runner
There are 2 writeups on this challenge, both attempted with ANGR and solved it arround 10-20 seconds, which is slow. I wrote the simulator and apply symbolic execution manually. This reduces the runtime to less than 1 second.
The challenge gives an endpoint, when netcat to the endpoint, a simple proof of work challenge is presented, after passing the challenge, the server output a MIPS binary base64 encodeded and wait for the correct submission of the binary.
First I use elftools to get the code section of the binary, then use capstone to disassemble all the bytecode into a list of instructions.
```python
from capstone import *
from elftools.elf.elffile import ELFFile
def get_insn_list(bytecode, first_addr):
insn_list = {}
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN)
for insn in md.disasm(bytecode, first_addr):
insn_list[insn.address] = (hex(insn.address), insn.mnemonic, insn.op_str, insn.size)
return insn_list
f = ELFFile(open('code_runner', 'rb'))
symbols = f.get_section_by_name('.dynsym')
[main] = symbols.get_symbol_by_name('main')
text = f.get_section_by_name('.text')
first_addr = text['sh_addr']
check_start = 0x00401994
bytecode = text.data()
insn_list = get_insn_list(bytecode, first_addr)
```
Then starting at the first function we know each function uses four bytes for checking and proceed if the conditions are either true or false. The functions go in until the last function is met which has no check. Simulate the code is quite hard, we have to find the correct branch to jump into (because scripting is very hard to manage exploration states). I have to build the control flow graph for each function and detect the correct path to. To build the control flow graph, we first split the function by its terminated instruction (branch/jump) and put the instructions into blocks called basic blocks.
```python
class Node:
def __init__(self, insn, next_func = None):
self.insn = insn
self.addr = insn[0][0]
self.next_func = next_func
(_, i, op) = insn[-1]
if i == "b":
self.branch = "branch"
self.to = int(op, 16)
elif i == "beq":
self.branch = "equal"
self.to = int(op.split(', ')[-1], 16)
elif i == "bne":
self.branch = "non_equal"
self.to = int(op.split(', ')[-1], 16)
else:
self.branch = "return"
self.to = None
self.mustbe = None
def split_to_nodes(func):
nodes = []
insn = []
next_func = None
for (addr, i, op) in func:
insn += [(addr, i, op)]
if i == "jal":
next_func = int(op, 16)
if i == "b" or i == "beq" or i == "bne" or i == "jr":
nodes += [Node(insn, next_func)]
next_func = None
insn = []
return nodes
```
After having the control flow graph, I follow the branch instruction that `mustbe` made and add constraints based on that. Now I only need to simulate the rest of MIPS code to add Symbolic variables and build up the constraints as the code simulate.
```python
def condition(self, z, param):
# PIndex is the index to input character
# I don't know why I created a class for the index
reg = {}
reg["$zero"] = 0
reg["$sp"] = 0
for (_, i, op) in self.insn:
# print(i, op)
if i in ["sw", "nop", "jal", "negu", "b"]:
pass
elif i == "move":
[out, x] = op.split(', ')
reg[out] = reg[x]
elif i == "lw":
[out, x] = op.split(', ')
if x == "0x20($fp)":
reg[out] = PIndex()
elif i == "lbu":
[out, x] = op.replace(')', '').replace('(', '').split(', ')
reg[out] = param[reg[x].v]
elif i == "addiu":
[a, b, c] = op.split(', ')
reg[a] = reg[b] + int(c, 16)
elif i == "addu":
[a, b, c] = op.split(', ')
reg[a] = reg[b] + reg[c]
elif i == "subu":
[a, b, c] = op.split(', ')
reg[a] = reg[b] - reg[c]
elif i == "xor":
[a, b, c] = op.split(', ')
reg[a] = reg[b] ^ reg[c]
elif i == "andi":
[a, b, c] = op.split(', ')
reg[a] = reg[b] & int(c, 16)
elif i == "sll":
[a, b, c] = op.split(', ')
reg[a] = reg[b] << int(c, 16)
elif i == "mult":
[a, b] = op.split(', ')
reg["hi"] = reg[a] * reg[b]
reg["lo"] = reg[a] * reg[b]
elif i == "mflo":
reg[op] = reg["hi"]
elif i == "bgez":
[a, _] = op.split(', ')
reg[a] = If(reg[a] > 0, reg[a], -reg[a])
elif i == "slt":
[a, b, c] = op.split(', ')
reg[a] = If(reg[b] <= reg[c], 1, 0)
elif i == "bnez":
[a, _] = op.split(', ')
if self.mustbe == True:
z.add(reg[a] == 0)
elif self.mustbe == False:
z.add(reg[a] != 0)
print(z)
elif i == "bne" or i == "beq":
[a, b, c] = op.split(', ')
if self.mustbe == True:
z.add(reg[a] == reg[b])
elif self.mustbe == False:
z.add(reg[a] != reg[b])
print(z)
else:
input("unknown instruction")
```
Because the function is a chain of calls to functions, I just keep running until the last function is met
```python
def do_next(insn_list, start = check_start):
print(hex(start))
nodes = split_to_nodes(dump_func(insn_list, start))
inspect_badjump(nodes)
next_func = None
z = Solver()
param = [BitVec(f"param_{i}", 8) for i in range(4)]
for n in nodes:
# print(n)
if n.next_func:
next_func = n.next_func
if hex(start) == "0x4013c8":
n.mustbe = False
n.condition(z, param)
else:
n.condition(z, param)
z.check()
m = z.model()
r = sorted([(d, m[d]) for d in m], key = lambda x: str(x[0]))
flag = list(map(lambda x: int(str(x[1])), r))
print(flag)
print()
if next_func:
return flag + do_next(insn_list, next_func)
return flag
```
And the correct answer is:
```python
answer = do_next(insn_list)
print(answer)
```
You might have noticed already, the start function is hard-coded. This is our team mistake (including me). We thought that there is only ONE binary, I solve only one binary. When the CTF is over, I read the writeup and found out that the binary downloaded is different each time we connect to the server. I don't know if I have the correct general answer, but the script I wrote generate the correct answer for the binary I have. If something has to be changed, than I need to find the first check function automatically.
> It seems that the function at `0x4013c8` has a different `mustbe` than other functions. I didn't remember, but I think I should recheck the `mustbe` value of each function.
> I will update the qemu-mips running here with the output generated from our code in the future
Our solution surpasses the runtime using ANGR and also not ad-hoc like other wirteups. By using symbolic execution, we can write a more general solution to the problem.
Full code and binary is [here](https://github.com/nganhkhoa/ctf-writeup/blob/master/2020/de1ctf/code_runner/code_runner.py). Again, bad code warning.
## What I miss from these writeups
Symbolic Execution Engine needs a very good state manager, because each time a branch is made, the state (atleast) doubles. Hand crafted solver like these two writeups doesn't rely on a state manager because we assume a code path that must be chosen. In real life scenario, code path are undecidable and require the Engine to select the good path, remove the bad path. Most research on Symbolic Execution is about Exploration Technique because a good algorithm saves the memory and time running a solution.
## References:
To be updated

48
content/osx/fairplay.md Normal file
View File

@ -0,0 +1,48 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Apple Fairplay protection in Mach-O"
subtitle: ""
summary: ""
authors: [luibo]
tags: [osx, iOS, macOS, dyld]
categories: [osx]
published: "2021-09-06"
lastmod: 2021-09-06T11:15:04+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: ["osx", "binary-format"]
---
Fairplay encryption created by Apple to protect digial possession rights. Implemented with a custom chip set for encryption and decryption with a hardcoded key. It is still unknown how to extract the key from the hardware. But decryption is feasible given a root access to the device.
When an application is loaded, the encrypted fairplay section must be decrypted. If the decryption is success, the app can start running as normal. During the course of the app's uptime, the section is decrypted and stayed in memory.
If the memory can be dumped when the app is running, we can retrieve the file in its un-encrypted form. Using Apple APIs, we can get the mapped binary file in memory. With this, we can collect the decrypted region and write back to file.
The method is clear. However, we need to run code in the same space as the applications. The details on how to do this can be found on [[Injections]]. Right now, there are solutions:
- https://github.com/stefanesser/dumpdecrypted
- https://github.com/AloneMonkey/frida-ios-dump
- https://github.com/BishopFox/bfdecrypt
- https://github.com/KJCracks/Clutch
There's also improvements to this decrypt technology. The first one being issuing fairplay `mremap_encrypted` to load the encrypted section only. https://github.com/JohnCoates/flexdecrypt
The second one is by using an exploit to read other process' memory space. https://github.com/DerekSelander/yacd. This method applies only on iOS 13 and above, but the good thing is, there is no need of jailbreak.
Given the current situation of Apple, fairplay decryption is no where near mitigated. Fairplay decryption is crucial for most analysis, as the app can't be viewed when encrypted. As of now, we can decrypt them using the above methods, atleast, until Apple hardens the process. But even so, we can still use lower devices to decrypt.

249
content/osx/injection.md Normal file
View File

@ -0,0 +1,249 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Injecting code into Mach-O"
subtitle: ""
summary: ""
authors: [luibo]
tags: [osx, iOS, macOS, dyld]
categories: [osx]
published: "2021-09-06"
lastmod: 2021-09-06T11:15:05+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: ["osx", "binary-format"]
---
This article introduces the reader to some easy injection that can be used to hijack the runtime of a Mach-O binary. Some techniques can be easy to perform, some are posible due to 3rd party toolings, and some are based on theory.
## Before start
Apple's loader loads and run all initial functions from dynamically linked libraries. Due to this, we can create functions that run before the main binary is started. With a little craft, we can also make our functions the first function to be run.
Started by making a dynamic library (`*.dylib`) with functions we wish to run in loader state as:
```c
struct ProgramVars {
void* mh; // mach_header or mach_header64
int* NXArgcPtr;
const char*** NXArgvPtr;
const char*** environPtr;
const char** __prognamePtr;
};
__attribute__((constructor))
void foo(int argc, const char** argv, const char** envp, const char** apple, struct ProgramVars* pvars) {
// code goes here
}
```
`__attribute__((constructor))` marks the compiler to place the function address into `__mod_init_func`, a section dedicated to be called by loader when the binary is loaded.
With this library compiled, we can run `foo` before the main binary is run by using these few methods.
## DYLD_INSERT_LIBRARIES
Similar to `LD_PRELOAD` on Linux OSes, `DYLD_INSERT_LIBRARIES` is read by loader and load addional libraries provided in value. This method is easy to do when working on MacOS, but impossible for system where we don't have access to terminal, legacy iOS, tvOS, watchOS.
## Adding load command
If we have a Mach-O binary, we can add another load command to make loader to find and load the library, which follows calling our library initial functions.
In most cases, Mach-O binary has a spare space between the list of load commands and the contents. We add a load command in this empty space, fix the header with new `ncmds` and `sizeofcmds`.
After that everything is set, we resign the binary (if iOS/tvOS/watchOS) and install. On run, loader loads and runs our functions before the main binary.
This method can be extended to make the function run first, but requires a very careful crafting.
As loader load each library following the order of their declaration in the main Mach-O binary. If we can move our library first, we can make our functions the very first to run.
Fixing the ordering of load commands can be done easily, but just fixing them won't work. As the opcodes to dynamic symbols are encoded with the library ordering. E.g. a symbol `printf` referencing library indexed `1` now must reference `2`, because we've pushed our library on top. And it gets worse, since the `__stub_helper` index into the opcode bytestream. Which means if we edit the bytestream and somehow mess up with the order, we fail.
### Fixing opcodes?
This section delivers an in depth analysis of this scenario. In the opcode bytestream, there are two opcode to encode the library index (we don't count the special index as it is defined in different opcode): `BIND_OPCODE_SET_DYLIB_ORDINAL_IMM` and `BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB`. To prevent long names, we call them `imm dylib` and `uleb dylib`, respectively.
Opcode using `imm dylib` will be 1 byte and apply for libraries with indexed not exceeding 15 (0xf is max).
Opcode using `uleb dylib` will be 2 bytes or more, where the first byte is the opcode, and the rest bytes are index encoded in uleb128.
Problem occurs when a `imm dylib` with index 15 is increased, which turns the number to 16 and have to encode using `uleb dylib`. Which breaks the index in `__stub_helper` for other symbols. When this is problematic event occurs, we can resolve these by many ways, I haven't tested these solution but in theory it should work.
1. Fixing `__stub_helper`
Straight forward solution, we update the new index to the symbols in opcode bytestream to the `__stub_helper`. We know which stub points to which symbol before the edit, after editing, just loop through each stub and update the new index.
2. Fixing it on runtime
This is harder and prone to crashing. Because we can inject our function before the main code is run, we add a resolver for these symbols. There are plenty ways to do this, depends on how creative you are and how brave are you to tackle these solutions.
Remarks: `__stub_helper` can't be edited on runtime, but the `__la_symbol_ptr`, which holds the address for each function (default to stub) can be edited on runtime. We abuse this.
`__la_symbol_ptr` doesn't show us which symbols is being called, however, we match the information with stub's old index to identify the symbol.
```
__la_symbol_ptr:
stub_1
stub_2
stub_1:
load index_1
call bind
stub_2:
load index_2
call bind
;; index 1 is foo of libA
;; index 2 is bar of libB
;;
;; __la_symbol_ptr = [foo of libA, bar of libB]
```
- Simulate the loader
On load, we updates the whole `__la_symbol_ptr` sections with symbols' address. We use Apple's API to find all loaded libraries, and traverse the export trie to find the function address.
> similar to overwrite `__got`, `__plt` in pwn technique
```c
struct Symbol {
char * name;
char * lib;
void * address = 0;
}
struct export_trie;
void* find_symbol_address(export_trie* exported);
export_trie* get_export_trie(void* header); // mach_header or mach_header64
void update_symbols_in_lib(Symbol* symbols, char * lib, export_trie *
exported) {
for (unsigned int i; i < len(symbols); i++) {
if (strcmp(symbols[i].lib, lib) == 0) {
void* addr = exported.find_symbol_address(symbols[i].name);
symbols[i].address = addr;
}
}
}
// not test, quick way to get la_symbol_ptr section pointer
// static volatile void* la_symbol_ptr __attribute__((section ("__DATA,__la_symbol_ptr"))) = { 0 };
void resolve() {
Symbol* to_bind = read_la_symbol_ptr();
for (uint32_t i; i < _dyld_image_count(); i++) {
void * header = _dyld_get_image_header(i);
char * lib = _dyld_get_image_name(i);
export_trie* exported = get_export_trie(header);
update_symbols_in_lib(to_bind, lib, exported);
}
}
```
- Hijack `dyld_stub_binder`
`dyld_stub_binder` holds the address to loader's bind method. Conveniently, this symbol resides in `__got`/`__nl_symbol_ptr`, which got resolved when the binary is loaded.
When our function run, we can rewrite this value to our custom function. Which will get call by other stubs. We now know the original index passed by stubs, we just need to change the old index to new ones and send to the original bind method. This seems easier to implement.
```c
void* find_original_bind() {
// read __nl_symbol_ptr or __got
// to find original dyld_stub_binder
// should be the first one (iirc)
}
// original bind function receives two parameter,
// first is index
// second is cache of libraries (iirc)
void custom_bind(int old_index, void* param) {
static void(*original_bind)(int, void*) = (/* cast */)find_original_bind();
int new_index = get_new_index(old_index);
original_bind(new_index, param);
}
```
## Cycript
Probably the first injection framework on iOS, but stopped development since 2016. Created by one of the most renowned jailbroken iOS developer, the creator of Cydia, Jay Freeman or commonly known as *saurik*.
At the latest version, Cycript supports til iOS 11. More information can be found publicy on their [website](http://www.cycript.org/).
## Frida
Frida is famously known for its injection ecosystem that works seemlessly across Android, Apple OS, Windows, Linux. To setup Apple device with Frida is easy and instrumentation, hijacking code can be done just by writing a piece of Javascript code.
The following guide is provided only for iOS devices.
### Setup
The setup of Frida is different between non-jailbroken and jailbroken devices.
For jailbroken devices, a server must be installed and run. Then frida (on PC/Mac) can connect through the usb cable and ask the server to perform tasks such as querying the system files, listing apps, start an app, hook a running app...
For non-jailbroken devices, if Frida < 12.7.12 is used, we must manually add the Frida dynamic library (FridaGadget) to the binary. The Frida documentation says that for Frida >= 12.7.12, FridaGadget is automatically injected, but I haven't tested, and doubt that it works with iOS (due to codesigning and restriction in environment).
### Inject then Hijack
I put simple script for reference. There are plenty on the Internet.
```js
// normal attach to inject onEnter and onExit
// demo CCCrypt module
Interceptor.attach(
Process.getModuleByName('libcommonCrypto.dylib').getExportByName('CCCrypt'),
{
onEnter(args) {
let algorithm = (function(algo) {
if (algo === 0) return "AES128";
if (algo === 1) return "DES";
if (algo === 2) return "3DES";
if (algo === 3) return "CAST";
if (algo === 4) return "RC4";
if (algo === 5) return "RC2";
return "algo_" + algo;
})(args[1].toInt32())
console.log("CCCrypt using " + algorithm)
console.log("CCCrypt key:")
console.log(args[3].readByteArray(args[4].toInt32()))
console.log("CCCrypt iv:")
console.log(args[5].readByteArray(16))
console.log("CCCrypt => " + args[6].readUtf8String())
}
}
)
// inject on an address of a lib or main binary
let module = Process.getModuleByName(module_name)
let offset = 0x1234 // reverse engineer
Interceptor.attach(SF.base.add(offset), {
onEnter() {
// accessing registers
// console.log("Calling x9 raw: " + this.context.x9)
}
})
```
## bfinject
> Easy dylib injection for jailbroken 64-bit iOS 11.0 - 11.1.2. Compatible with Electra and LiberiOS jailbreaks
Update soon(tm)

93
content/osx/linker.md Normal file
View File

@ -0,0 +1,93 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Mach-O linker information"
subtitle: ""
summary: ""
authors: [luibo]
tags: [osx, iOS, macOS, dyld]
categories: [osx]
published: "2021-09-06"
lastmod: 2021-09-06T11:15:02+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: ["osx", "binary-format"]
---
Dynamic symbols in Mach-O binary are stored in a form of bytecode and exported symbols are encoded as a prefix-`trie`. For dynamic symbols, Mach-O also has a stud binding to resolve symbols, which is the same as `__got` and `__plt` section on ELF binaries.
## Dynamic symbols
The linker reads up on symbols table and perform binding when neccessary. We start by explaining the bytecodes and finish with the binding process.
We have 4 different bytecode arrays, `rebase`, `bind symbol`, `weak bind symbol`, and `lazy bind symbol`. All 4 arrays using the same set of bytecode and laid out continuously in binary, with the only difference is the usecase.
Each byte encodes an opcode and its parameter, `uint8_t v = opcode | imm`. Some opcode require an additional `uleb128` number, the number can be read from the next bytes (at most 7 bytes, due to uleb128 encoding). Some opcode require a string, encoded next to and end with `0x00`.
The opcode reads into a state, and the state mutates after every code read. Basically the opcode is a compressed table, where we read through each row. Every new row read is first copied from the previous row and then update the collumns.
Often the binary is loaded in memory with a PIE slice, due to ASLR. A number of constants address created at compile time is no longer correct. When this happens, loader reads up the `rebase` opcode and add up the address constants to a value of slice.
`bind symbol`, `weak bind symbol`, `lazy bind symbol` are decoded into a list of dynamic symbols. Each symbol has `dylib ordinal`, `segment index`, `name`, `address`. At the start of each row, `bind_done` is performed, which will find the `name` symbol from the library declared using load command at index `dylib ordinal` (count start at 1), and write the address of the function at `address`. `dylib ordinal` has special values of 0, -1, -2 to indicate the the special dynamic library.
### Binding process
The binding process happens when a symbol address is written into the memory to provide callback for the original code. This process exist because the symbols are undefined in compiled time, and only visible at runtime, yet the address of symbols are randomly located after each run.
To resolve this issue, Mach-O binary has a fake jump into the symbols. Where as the original code calls an imported symbols, `foo`, it actually calls to a function that redirect to the resolved address.
```asm
__text:
call foo_ ;; call foo, but with a holder
foo_:
load foo_addr_holder
call
foo_addr_holder:
0x000000
```
With the above scheme, the compiler can easily create a holder for the address and let the loader re-write the address at runtime. One draw-back to this scheme requires the loader to resolve all imported symbols' address holders. Result in a longer startup time. But Mach-O can also perform lazy binding, by following the below scheme.
```asm
__text:
call foo_ ;; call foo lazy
foo_:
load foo_addr_holder
call
foo_addr_holder:
foo_addr_resolver ;; re-writen by loader after resolving
foo_addr_resolver:
load foo_opcode_start_index ;; just a number
call loader_symbol_resolver
```
For lazy bind symbols, the Mach-O has a resolver for each lazy symbols, and the function is called on the first time calling. This function loads a number and call the resolver from the loader. When the loader resolver finished, the address holder of the lazy symbol is re-written to contain the address of the symbol.
The number that is passed into loader's resolver is the index into the row of the lazy bind symbol of the correspondent symbol.
In Mach-O, the section for address holder or `__nl_symbol_ptr` and `__la_symbol_ptr` for non lazy (first scheme) and lazy (second scheme) symbols, respectively. The resolver section is called `__stub_helper`. In Go generated binaries, non lazy symbols section is named `__got`.
## Exported symbols
`exported symbols` is encoded as a prefix-`trie`, where each node holds an export symbol. The symbol can be Regular, Weak, Reexport, or Stub. Regular symbol has an address field, which is the offset from Mach-O. The parsing of the trie is quite simple, but requires a little recursive writting. Apple also write the encoding process in the Mach-O headers.
For regular symbols, the offset collected is the file offset of the Mach-O. This way, when searching for the function the loader can easily calculate the address on memory. The trie can also speed up searching by only follow the branch that matches the symbol to be found.

89
content/osx/macho.md Normal file
View File

@ -0,0 +1,89 @@
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "Overview of Mach-O binary"
subtitle: ""
summary: ""
authors: [luibo]
tags: [osx, iOS, macOS, dyld]
categories: [osx]
published: "2021-09-06"
lastmod: 2021-09-06T11:15:01+07:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
image:
caption: ""
focal_point: ""
preview_only: false
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
projects: ["osx", "binary-format"]
---
Mach-O is a binary format used by Apple for its systems. The binary format contains assembled bytes, data and other information. Structured by a list of load commands, where each load command hold the neccessary pointers to the contents.
## Header
At offset 0 lies a header structure, `struct mach_header`, containing the general information about the the binary.
```
struct mach_header {
uint32_t magic;
cpu_type_t cputype;
cpu_subtype_t cpusubtype;
uint32_t filetype;
uint32_t ncmds;`
uint32_t sizeofcmds;
uint32_t flags;
};
```
Valid `magic` values are: `0xfeedface` for 32-bit format, `0xfeedfacf` for 64-bit format, little endian wise, big endian version are bytes swapped.
`cputype` and `cpusubtype` declare on which platform can this binary be loaded (or which assembly type this file contains). The most we are gonna see are x86, x86_64, and arm64, arm64e; while arm 32-bit aka armv7 (armv7s, armv7a) exist, Apple dropped support for these platforms since the release of iPhone 6.
`filetype` denotes the type of binary, *executable*, *dynamic library*, *object file*.
`ncmds` and `sizeofcmds` declare the number of load commands and the total size of load commands in byte. The reason why the size is required because the size of load command types varied. Also, the `sizeofcmds` is checked upon binary load, and throws error if it's incorrect.
`flags` is bit mask value for extra information, e.g. PIE.
## Load command
Each load command is structured, type of `cmd`, command size `cmdsize`, and information of that command.
There are many types of load commands, however we only focus on `segment`, `dynamic library`, `symbols`, `fairplay`, `codesignature` command types.
Segments are common in executable/library binaries. These point to the data inside where the `.text` or `.data` reside. In Mach-O binaries, a segment load command is followed by a series of sections, with each section mark the start/end of the data. The common sections are: `__text`, `__cstring`, `__const`, `__got`, `__la_symbol_ptr`, `__mod_init_func`, `__data`, `__bss`. These sections can be named without any restrictions, however compilers often name them by a rule of thumb. The attributes for the sections is marked with bit mask `flag`, indicating the attributes of the items.
A unique segment with no section is named `__LINKEDIT`. This section points to the last part of the binary containing various information, including tables of symbols, tables of symbols name, list of exported symbols, and binary's signature.
Each dynamic library is registered through a load command containing the path to the library. The path can either be absolute or relative. Absolute path resolving is straight-forward. With relative path resolving, the binary can use either of the two forms: relative to current directory, or **rpath**. Relative path with the current directory is easy to understand, `./`, `../` and such paths are valid in this case.
**rpath** is a little different, in short, the path started with either these variables: `@executable_path`, `@loader_path`, `@rpath`. `@executable_path` is replaced with the executable's residing folder, `@loader_path` is replaced with the path of the folder containing the loader. `@rpath` is resolve by `rpath` load commands.
The Mach-O binary can possess many load command to denote the `rpath`, each of the item must be an absolute path, or relative path, or using `@executable_path` or `@loader_path` or `@rpath`. It is unclear whether `rpath` can be stacked, but as a rule of thumb, we should not use `@rpath` on rpath load command. A common rpath often used by Apple is `@executable_path/Frameworks`, which can be seen on iPhone/iPad application binaries compiled using Xcode.
Fairplay encryption is a mechanism designed by Apple to encrypt the app content with the device private key, such that you cannot run the app from another machine. The Mach-O binary always have a load command pointing to the section starts and end, and the encryption status.
Due to Apple design of the fairplay, we can't recover the key to decrypt. However, we can actively dump the binary on memory, as it must be decrypted before running. Another method involves using the Apple mmap for fairplay encrypted region. These should be discussed on [[Fairplay]].
Codesignature is present on signed binary, using `codesign` with a `distribution` or `development` key. The sections tells us many informatin regarding the signer, and hashes. The signature is encoded in a PKCS#7/CMS with SignedData encoded in BER of ASN.1 (X.609). It also contains the list of certificates in X.509 format, and the signature digest. Currently Apple is using RSA to sign its binary.
The binary must be signed with a certificate chain root as Apple CA, otherwise Apple devices reject installation. Apps distributed through the Apple Store is also signed by Apple Store and device distribution certificate. For self-signed binary, the Apple CA is still the root certificate, while the children are `developer` certificate.
Symbols are encoded as a series of bytecode, a load command is specified to mark the region of symbols. This command registers the placement of `non lazy`, `lazy`, `exported` symbols. `non lazy` symbols are searched and written into the `got` table when the binary is loaded, `lazy` symbols are searched through `plt`, `export` symbols are indexes/addresses into the function start.
`non lazy` and `lazy` symbols are encoded as **bind** opcode; `export` symbols are encoded as a prefix-`trie`. More detailed about these in [[Linker Info]].
The above paragraph states the current situation of Mach-O symbols encoding. However, a few years ago, this was not the case. Few years back (don't know when), they have a list of symbols and dynamic symbols in sperated commands. Thus in the newver version of Mach-O, they have a command id as, `LC_DYLD_INFO_ONLY`, which shows that it should not be used with the legacy list anymore. Loader crashes if this command is used with an non-empty list of (dynamic) symbols.
The Mach-O related structures can be found and read on Apple's `cctools` modules at `include/mach-o/loader.h`.

BIN
public/cv.pdf Normal file

Binary file not shown.

Binary file not shown.

View File

@ -18,29 +18,36 @@ type alias BlogPost =
}
blogPostsGlob : BackendTask.BackendTask error (List { filePath : String, slug : String })
blogPostsGlob =
contentPostsGlob : String -> BackendTask.BackendTask error (List BlogPost)
contentPostsGlob folder =
Glob.succeed BlogPost
|> Glob.captureFilePath
|> Glob.match (Glob.literal "content/blog/")
|> Glob.match (Glob.literal ("content/" ++ folder))
|> Glob.capture Glob.wildcard
|> Glob.match (Glob.literal ".md")
|> Glob.toBackendTask
blogPostsGlob = contentPostsGlob "blog/"
osxPostsGlob = contentPostsGlob "osx/"
allMetadata :
BackendTask.BackendTask
(String -> Route.Route)
-> BackendTask.BackendTask
{ fatal : FatalError, recoverable : File.FileReadError Decode.Error }
(List BlogPost)
-> BackendTask.BackendTask
-- error
{ fatal : FatalError, recoverable : File.FileReadError Decode.Error }
(List (Route.Route, ArticleMetadata))
allMetadata =
blogPostsGlob
allMetadata routeBuilder posts =
posts
|> BackendTask.map
(\paths ->
paths
|> List.map
(\{ filePath, slug } ->
BackendTask.map2 Tuple.pair
(BackendTask.succeed <| Route.Blog__Slug_ { slug = slug })
(BackendTask.succeed <| (routeBuilder slug))
(File.onlyFrontmatter frontmatterDecoder filePath)
)
)
@ -52,7 +59,6 @@ allMetadata =
(\( route, metadata ) ->
if metadata.draft then
Nothing
else
Just ( route, metadata )
)
@ -62,10 +68,12 @@ allMetadata =
(\( route, metadata ) -> -(Date.toRataDie metadata.published))
)
blogAllMetadata = allMetadata (\s -> Route.Blog__Slug_ { slug = s }) blogPostsGlob
osxAllMetadata = allMetadata (\s -> Route.Osx__Slug_ { slug = s }) osxPostsGlob
type alias ArticleMetadata =
{ title : String
, description : String
, summary : String
, published : Date
-- , image : Url
, draft : Bool
@ -76,7 +84,7 @@ frontmatterDecoder : Decoder ArticleMetadata
frontmatterDecoder =
Decode.map4 ArticleMetadata
(Decode.field "title" Decode.string)
(Decode.field "description" Decode.string)
(Decode.field "summary" Decode.string)
(Decode.field "published"
(Decode.string
|> Decode.andThen