This commit is contained in:
nganhkhoa 2023-12-10 03:59:01 +07:00
parent b559885ed4
commit 9878ef3e1b
8 changed files with 125 additions and 49 deletions

View File

@ -82,8 +82,11 @@ view :
-> View msg
view app shared =
{ title = "nganhkhoa blogs"
, body = app.data
|> List.map renderBlogItem
, body =
[ Link.link (Link.internal (Route.Index)) []
[ text "Home" ]
, div [] (app.data |> List.map renderBlogItem)
]
}
renderBlogItem : (Route, Article.ArticleMetadata) -> Html msg
@ -92,7 +95,6 @@ renderBlogItem (route_, article) =
[ div []
[ div []
[ text article.title
, text article.summary
]
]
]

View File

@ -7,10 +7,13 @@ import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled exposing (..)
import Html.Styled.Attributes exposing (style)
import Link exposing (Link)
import Json.Decode as Decode exposing (Decoder)
import Json.Decode.Extra
import Pages.Url
import PagesMsg exposing (PagesMsg)
import Route
import RouteBuilder exposing (App, StatelessRoute)
import Shared
import View exposing (View)
@ -131,10 +134,18 @@ view :
-> Shared.Model
-> View (PagesMsg Msg)
view app shared =
let rendered = (app.data.body |> Markdown.Renderer.render TailwindMarkdownRenderer.renderer) |> Result.withDefault []
in
{ title = app.data.metadata.title
, body =
(app.data.body
|> Markdown.Renderer.render TailwindMarkdownRenderer.renderer
|> Result.withDefault []
)
[ Link.link (Link.internal (Route.Index))
[ style "margin" "10px" ]
[ text "Home" ]
, Link.link (Link.internal (Route.Blog__Slug_ { slug = "" }))
[ style "margin" "10px" ]
[ text "Blog" ]
, br [] []
, h1 [] [ text app.data.metadata.title ]
, div [] rendered
]
}

View File

@ -7,11 +7,14 @@ import FatalError exposing (FatalError)
import Head
import Head.Seo as Seo
import Html.Styled exposing (..)
import Html.Styled.Attributes exposing (style)
import Link exposing (Link)
import Json.Decode as Decode exposing (Decoder)
import Json.Decode.Extra
import Pages.Url
import PagesMsg exposing (PagesMsg)
import RouteBuilder exposing (App, StatelessRoute)
import Route
import Shared
import View exposing (View)
@ -131,11 +134,18 @@ view :
-> Shared.Model
-> View (PagesMsg Msg)
view app shared =
let rendered = (app.data.body |> Markdown.Renderer.render TailwindMarkdownRenderer.renderer) |> Result.withDefault []
in
{ title = app.data.metadata.title
, body =
(app.data.body
|> Markdown.Renderer.render TailwindMarkdownRenderer.renderer
|> Result.withDefault []
)
[ Link.link (Link.internal (Route.Index))
[ style "margin" "10px" ]
[ text "Home" ]
, Link.link (Link.internal (Route.Osx__Slug_ { slug = "" }))
[ style "margin" "10px" ]
[ text "OSX Index" ]
, br [] []
, h1 [] [ text app.data.metadata.title ]
, div [] rendered
]
}

View File

@ -8,7 +8,7 @@ import SiteConfig exposing (SiteConfig)
config : SiteConfig
config =
{ canonicalUrl = "https://elm-pages.com"
{ canonicalUrl = "https://nganhkhoa.com"
, head = head
}
@ -16,6 +16,19 @@ config =
head : BackendTask FatalError (List Head.Tag)
head =
[ Head.metaName "viewport" (Head.raw "width=device-width,initial-scale=1")
, Head.sitemapLink "/sitemap.xml"
-- , Head.nonLoadingNode "link"
-- [ ( "rel", Head.raw "stylesheet" )
-- , ( "crossorigin", Head.raw "anonymous" )
-- , ( "href", Head.raw "https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css" )
-- ]
-- , Head.nonLoadingNode "script"
-- [ ( "crossorigin", Head.raw "anonymous" )
-- , ( "href", Head.raw "https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js" )
-- ]
-- , Head.nonLoadingNode "script"
-- [ ( "crossorigin", Head.raw "anonymous" )
-- , ( "href", Head.raw "https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js" )
-- , ( "onload", Head.raw "renderMathInElement(document.body);" )
-- ]
]
|> BackendTask.succeed

View File

@ -2,79 +2,83 @@
title: "Malware Neutralization"
subtitle: ""
summary: ""
tags: ["program analysis", "binary analysis", "program synthesis", "malware"]
tags: ["program analysis", "binary analysis", "program synthesis", "malware", "machine learning", "AI", "deep learning"]
categories: []
published: "2023-11-05"
published: "2023-11-07"
featured: false
draft: false
---
# Malware Neutralization
In this blog post, I will describe a novel idea for software security named "Malware Neutralization". At the moment, everything said in here is theoretic. There will be no implementation, or even a PoC. It could be a new topic for furthur research in the future, I don't know.
In this blog post, I introduce a novel concept known as **Malware Neutralization**. While the following discussion remains theoretical without practical implementation or a Proof of Concept (PoC), I guarantee its sensical and logical for a long-unsolved problem. The concept described uses a series of novel technologies that were not available in the past. However, recent advancements in the past years have made this concept at least theoretically feasible, which is the reason behind this blog post.
## Motivation
Ever download something online and got "infected"? The problem with overwhelming distribution of software (legit and non-legit) is the validity of the software. Software downloaded could be infected or modified with malware components. And not only software, documents file can be infected too.
In the early 21st century, a category of malware known as Injected File gained significant attention as one of the first types of cyberattacks to capture widespread interest. Unlike the sophisticated tactics employed in modern cyberattacks, Injected File attacks involve the modification of a file with malicious code, enabling it to be later executed and propagate itself. Over time, these attacks fell into relative obscurity, overshadowed by various factors.
How are we dealing with this issue? Easy answer, we remove them as soon as we detect them as "malware". Is this the best solution? Are there any other way to solve this issue? I propose **Malware Neutralization**.
One of the primary reasons for their decline is the effectiveness of antivirus and Endpoint Detection and Response (EDR) systems, which promptly remove any file detected with malicious components. Additionally, the prevailing trend among malicious actors shifted towards the creation of weaponized malware for mass distribution. Injected File attacks, which involve altering existing files, became less common. However, recent developments have indicated a resurgence of Injected File malware, as reported by Mandiant in 2021.
While the conventional response to such attacks has been the removal of all infected files, this may not always be the ideal solution. In some cases, these files hold significant importance for the system or the user. Recovering infected files has posed a persistent challenge over the years, with the most effective approach often involving the use of periodic backups. However, situations arise where backups are unavailable, or the original unmodified file is no longer accessible.
In the context of the recent advancements in technology over the past few decades, notably in the fields of Machine Learning, Deep Learning, and Program Synthesis, it has become evident that this long-standing problem may find a more effective solution through the proposed concept of **Malware Neutralization**.
## Concept
So what is **Malware Neutralization**? The concept is easy to explain in a few lines. We remove malware components in the binary (be it software or documents) while keeping other components. This effectively keep the "good" and does not remove the file from running.
**Malware Neutralization** represents an innovative paradigm - the removal of malicious components within binary files, encompassing software executables and documents, while preserving the integrity of legitimate components. This novel approach ensures that the "good" elements are retained, thereby allowing the file to maintain its intended functionality.
The steps to make this work can be illustrated below:
The process can be segmented into the following steps:
1. Detect malicious components
2. Remove malicious components
3. Repair the binary
1. Detection of Malicious Components
2. Removal of Malicious Components
3. Binary Restoration
In the following sections, we will go into each of these steps and discuss their technical view.
Subsequent sections will delve into each of these steps, dissecting their technical intricacies.
### Detection of Malicious Components
## Steps
Detection of malicious components within binary files entails a formidable challenge. It demands a profound understanding of various malware infection techniques. Conventional methods like YARA signatures or heuristic-based detection, designed for rapid software classification, fall short in this context. A more rigorous approach is essential, one that ensures the identification of all malicious components. To this end, Machine Learning or Deep Learning models offer a promising path, as they can provide the agility and accuracy required for this complex endeavor.
### Detecting malicious components
### Removal of Malicious Components
This step is easy to understand, we must be able to detect the malicious components for a given binary file. This involves a compilation of all malware infection techniques (I shall call this malware embeding). If we do not know about the techniques used for malware embeding, we cannot deploy a good detection method.
Once all malicious components have been successfully identified, the subsequent step involves their surgical removal from the binary. This may involve overwriting their locations with null bytes, particularly for executable binaries. Document files, such as OLEs or zip streams, might necessitate encoding/decoding for the removal process. One of the most challenging aspects of this step pertains to determining the extent of removal. In the absence of comprehensive evidence, we may need to contend with the possibility that malicious components span a significant portion of the binary, while our detection methods may only unveil a fraction of them. These dilemmas underscore the need for further research, particularly in defining the scope of removal, whether it entails the entire function or only a portion of what has been detected.
Detecting the malicious components cannot be easily applying YARA signatures or normally detection based on heuristics. These detection techniques are for quick classification of software. In this context, a quick classification is not enough, we demand all malicious components be found to carry on. This requirement is strict and might appear hard to find a fully working method.
### Binary Restoration
I suggest using Machine Learning or Deep Learning model to tackle this problem. Although I am not an AI guy, but with my limited knowledge of malware detection, I believe this way is the fastest.
The final step, binary restoration, brings into play the concept of Program Synthesis. Some malware infection techniques exhibit strong ties to legitimate components, necessitating the recovery of removed components without disrupting the overall execution flow. Automatic Program Repair, a facet of Program Synthesis, offers a potential solution. This involves generating assembly-level patches to rectify identified issues, ideally without impeding the functionality of other legitimate components.
### Removing malicious components
This step introduces its own set of challenges, such as establishing constraints for reinstating the removed segments. In the context of assembly, this could involve maintaining stacks and registers, although this remains open to debate. Another challenge concerns the algorithmic approach for determining the correct "fixes". This could involve logical analysis of program semantics, Machine Learning, Deep Learning, or other methodologies, with a focus on effectiveness, robustness, and speed. While Program Synthesis is the proposed technology for this step, alternative approaches might emerge to effectively restore and repair the removed components.
After all malicious components are found, the next step is simply removing them from the binary. This could be simply done through overwriting their place with a series of dummy bytes, i.g., 0x00. In practical scenario, this involves direct assembly patching for executable binaries or encoding/decoding of documents files (OLEs, or zip streams).
## History
The hardest part of this step is probably how much should we remove. Without evidence, I guess that malicious components might spans over a large part of the binary, but the detection might only be able to discover a part of them. This might strongly effect how we approach the removal step, mostly on the degree of removal. Should a whole function be removed or only a part of what is detected is removed. These questions are subtopic to be researched.
In this section, we'll rewind to the early years of the 21st century and delve into the prevalent trend of Injected File Viruses.
### Repairing the binary
The most common method employed by Injected File Viruses is the modification of the entry point within a file. This alteration redirects the entry point to the malware code, ensuring that when the file is executed, the malicious components take precedence. Variants of this method may involve placing the malware at different locations within the file or utilizing an inline hook to control the flow of the program and direct it to the malware components. To help visualize the underlying logic of these malware types, consider the diagram below:
And the last step, repairing the binary. People with less familiarity with Program Synthesis might not understand what Repairing means. So I give a short description on Automatic Program Repair, a subtopic of Program Synthesis.
![Injected File Virus](https://blogs.quickheal.com/wp-content/uploads/2018/01/Picture2.png)
Although some research has documented these behaviors, it's important to note that these infection techniques, while seemingly straightforward, are not only easy to detect but also to rebuild or reconstruct the file. This process often requires a profound understanding of malware and manual binary patching. It's worth mentioning that the point at which antivirus software began to accept the removal of infected files remains unclear. However, it was a logical step towards enhancing system protection.
> Automated program repair is an emerging suite of technologies for automatically fixing errors or vulnerabilities—bugs, colloquially—in software systems. Automatic program repair as a research field focuses on a class of techniques that produces source code-level patches for such bugs, of the same variety that programmers produce in addressing a defect they find in their own programs or in response to a bug report. Thus, at a high level, an automatic repair approach takes as input a program and some evidence that the program has a bug (commonly, a failing test) and produces a patch for that programs source to fix that bug, ideally without negatively influencing other correct functionality.
As technology advanced, the prevalence of Injected File Viruses began to wane, gradually losing their prominence to more insidious forms of malware. Modern threats, such as stealthy stealers, cryptojacking, and the destructive power of ransomware, took center stage, leveraging sophisticated techniques to evade detection and wreak havoc on systems.
I found this in the introduction of the book [Automatic Program Repair](https://www.computer.org/csdl/magazine/so/2021/04/09461040/1uCdJpSV13a).
However, even in this evolving landscape, the concept of Infected File Viruses remains relevant, albeit with a different classification. Concrete evidence has emerged, highlighting that documents in various formats (e.g., Word, Excel, PDF) can serve as vessels for malicious components. These files, seemingly innocuous, can harbor hidden threats.
So what does that have to do with this step? Obviously, we only want to neutralize malware components found in the binary. However, some of the techniques for malware embeding might involve strong binding with the underlying valid components. Thus even after you successfully remove all malware components, the binary cannot be executed successfully. To solve this issue, I propose using Automatic Program Repair to recover the removed components without breaking the execution flow.
Ofcourse, this proposed method contains multiple problems that should be looked at independently to complete this step effectively. One of the first problem is how should we build constrainst to fill this removed part. For assembly, it could be maintaining the stacks and registers, but that is open to arguements. Another problem that might arise is the algorithm used to define the correct "fixes". This could be done through logic examination of programs (Program Semantics) or through Machine Learning / Deep Learning or even through LLM. Of course, these "algorithms" must compete in their effectiveness, robustness, and speed.
This step is proposed to use a relatively novel technology called Program Synthesis. There could be other technology for the recovering/repairing of the removed parts to render the program executable without errors emerging from our removal of malicious components.
Additionally, Trojans represent another manifestation of Infected Files. Trojans often pose as legitimate programs, concealing their malevolent components. They exemplify the enduring nature of Infected File malware, adapting to new forms and evading detection while posing significant threats to cybersecurity.
## Conclusion
To recap, we define **Malware Neutralization** as a process to neutralize a binary, either software executable or documents files or other types of files susceptible to malware embeding. We also list out an overall step-by-step to perform this process, together with their technical problems.
In summary, **Malware Neutralization** emerges as a progressive process designed to neutralize binary files, encompassing software executables and documents, by selectively removing malicious components while preserving the overall functionality of the file. This innovative approach paves the way for a fresh avenue of research in the realm of malware mitigation. While its potential is intriguing, its efficiency remains to be substantiated. The decision to delve deeper into this topic in the future will be contingent on the urgency of finding a solution, its demonstrated efficacy, and its capacity to address the evolving landscape of malware threats. Undoubtedly, the ability to neutralize (recover) infected files represents a long-awaited solution that could reshape the landscape of cybersecurity. As the field progresses, the quest for robust and effective methods to combat malware continues, with **Malware Neutralization** offering a promising direction for further exploration.
**Malware Neutralization** are relevant to nowadays software and files distribution. Distribution of files can be unsafe due to many factors. And by removing the malicious components embeded inside the file, neutralization, the file can be used normally without fear of malware infection.
## Research and Future Prospects
The proposed idea is an innovative way of ensuring safety to our daily lives of files downloads. While still maintain the overall content of the file to be executed or read, without having to remove them as awhole when flagged as a malware.
This blog post extends an open invitation to researchers interested in exploring the concept of Malware Neutralization. The guidelines provided herein, covering various domains such as Malware Analysis, Program Analysis, Binary Analysis, Assembly, Machine Learning, Deep Learning, Large Language Modeling, Program Synthesis, Program Semantics offer a comprehensive roadmap for tackling this multifaceted challenge. Researchers are encouraged to begin by addressing each aspect individually and subsequently integrating solutions. A pivotal milestone in this journey involves the development of a Proof of Concept (PoC) to validate the practicality of the concept.
> I should emphasize that this is not for malware with only malware functionality.
I may or may not undertake further research in this field, but welcome and anticipate engagement from researchers keen on taking this concept to the next level, thereby contributing to the ongoing evolution of software security.
## Research?
I leave my idea open to the world. Researchers interested in this problem can carry on the research with the following guidelines described above (or not, you are free to explore all methods). This research should be a joint research of several topics together, including Malware Analysis, Program Analysis, Binary Analysis, Assembly, Machine Learning, Deep Learning, Large Language Modeling, Program Synthesis, Program Semantics, Formal Assembly, as such. It will be hard to tackle all of them at once, I suggest tackle what is familiar with you first and solve them seperately. After all steps are implemented, a PoC should be produced to prove the overall performance.
## Bibliography
I may work on this problem when I have an opportunity and when my research is aligned with my target. In the near future, I might work on something else not relating to this idea. However, I would love to hear from researchers taking my idea to the test.
1. [https://www.mandiant.com/resources/blog/pe-file-infecting-malware-ot](https://www.mandiant.com/resources/blog/pe-file-infecting-malware-ot)
2. [https://www.seqrite.com/blog/virus-infectors-a-perpetual-attack-vector/](https://www.seqrite.com/blog/virus-infectors-a-perpetual-attack-vector/)
3. [https://documents.uow.edu.au/~jennie/WEBPDF/1997_09.pdf](https://documents.uow.edu.au/~jennie/WEBPDF/1997_09.pdf)
4. [https://arxiv.org/abs/1306.4666](https://arxiv.org/abs/1306.4666)

View File

@ -8,6 +8,42 @@ export default {
return `
<link rel="stylesheet" href="/style.css" />
<meta name="generator" content="elm-pages v${context.cliVersion}" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css" integrity="sha384-n8MVd4RsNIU0tAv4ct0nTaAbDJwPJzDEaqSD1odI+WdtXRGWt2kTvGFasHpSy3SV" crossorigin="anonymous"/>
<!-- The loading of KaTeX is deferred to speed up page rendering -->
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js" integrity="sha384-XjKyOOlGwcjNTAIQHIpgOno0Hl1YQqzUOEleOLALmuqehneUG+vnGctmUb0ZY0l8" crossorigin="anonymous"></script>
<!-- To automatically render math in text elements, include the auto-render extension: -->
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/contrib/auto-render.min.js" integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05" crossorigin="anonymous"
onload="renderMathInElement(document.body);"></script>
<script defer type="text/javascript">
// delay until the whole page is rendered to run Katex
setTimeout(() => {
renderMathInElement(document.body, {
// customised options
// • auto-render specific keys, e.g.:
delimiters: [
{left: '$$', right: '$$', display: true},
{left: '$', right: '$', display: false},
{left: '\\(', right: '\\)', display: false},
{left: '\\[', right: '\\]', display: true}
],
// • rendering keys, e.g.:
throwOnError : true
});
}, 1000);
</script>
<style>
body {
max-width: 1080px;
margin: 0 auto !important;
float: none !important;
}
</style>
`;
},
preloadTagForFile(file) {

Binary file not shown.

Binary file not shown.