diff --git a/Readme.md b/Readme.md
index 301e2369..c47e07a5 100644
--- a/Readme.md
+++ b/Readme.md
@@ -131,7 +131,7 @@ pip install --upgrade khoj-assistant
 - Indexing is more strongly impacted by the size of the source data
 - Indexing 100K+ line corpus of notes takes about 10 minutes
 - Indexing 4000+ images takes about 15 minutes and more than 8Gb of RAM
-- Once <https://github.com/debanjum/khoj/issues/36> is implemented, it should only take this long on first run
+- Note: *It should only take this long on the first run* as the index is incrementally updated
 
 ### Miscellaneous
 
diff --git a/src/configure.py b/src/configure.py
index f6476951..ed46af37 100644
--- a/src/configure.py
+++ b/src/configure.py
@@ -48,11 +48,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
             config.content_type.org,
             search_config=config.search_type.asymmetric,
             regenerate=regenerate,
-            filters=[
-                DateFilter(),
-                WordFilter(config.content_type.org.compressed_jsonl.parent, SearchType.Org),
-                FileFilter(),
-            ])
+            filters=[DateFilter(), WordFilter(), FileFilter()])
 
     # Initialize Org Music Search
     if (t == SearchType.Music or t == None) and config.content_type.music:
@@ -71,11 +67,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
             config.content_type.markdown,
             search_config=config.search_type.asymmetric,
             regenerate=regenerate,
-            filters=[
-                DateFilter(),
-                WordFilter(config.content_type.markdown.compressed_jsonl.parent, SearchType.Markdown),
-                FileFilter(),
-            ])
+            filters=[DateFilter(), WordFilter(), FileFilter()])
 
     # Initialize Ledger Search
     if (t == SearchType.Ledger or t == None) and config.content_type.ledger:
@@ -85,11 +77,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
             config.content_type.ledger,
             search_config=config.search_type.symmetric,
             regenerate=regenerate,
-            filters=[
-                DateFilter(),
-                WordFilter(config.content_type.ledger.compressed_jsonl.parent, SearchType.Ledger),
-                FileFilter(),
-            ])
+            filters=[DateFilter(), WordFilter(), FileFilter()])
 
     # Initialize Image Search
     if (t == SearchType.Image or t == None) and config.content_type.image:
diff --git a/src/processor/ledger/beancount_to_jsonl.py b/src/processor/ledger/beancount_to_jsonl.py
index c0136bc6..3af45c7a 100644
--- a/src/processor/ledger/beancount_to_jsonl.py
+++ b/src/processor/ledger/beancount_to_jsonl.py
@@ -7,9 +7,10 @@ import pathlib
 import glob
 import re
 import logging
+import time
 
 # Internal Packages
-from src.utils.helpers import get_absolute_path, is_none_or_empty
+from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
 from src.utils.constants import empty_escape_sequences
 from src.utils.jsonl import dump_jsonl, compress_jsonl_data
 
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
 
 
 # Define Functions
-def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file):
+def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, previous_entries=None):
     # Input Validation
     if is_none_or_empty(beancount_files) and is_none_or_empty(beancount_file_filter):
         print("At least one of beancount-files or beancount-file-filter is required to be specified")
@@ -28,18 +29,34 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file):
     beancount_files = get_beancount_files(beancount_files, beancount_file_filter)
 
     # Extract Entries from specified Beancount files
-    entries, transaction_to_file_map = extract_beancount_entries(beancount_files)
+    start = time.time()
+    current_entries = convert_transactions_to_maps(*extract_beancount_transactions(beancount_files))
+    end = time.time()
+    logger.debug(f"Parse transactions from Beancount files into dictionaries: {end - start} seconds")
+
+    # Identify, mark and merge any new entries with previous entries
+    start = time.time()
+    if not previous_entries:
+        entries_with_ids = list(enumerate(current_entries))
+    else:
+        entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
+    end = time.time()
+    logger.debug(f"Identify new or updated transaction: {end - start} seconds")
 
     # Process Each Entry from All Notes Files
-    jsonl_data = convert_beancount_entries_to_jsonl(entries, transaction_to_file_map)
+    start = time.time()
+    entries = list(map(lambda entry: entry[1], entries_with_ids))
+    jsonl_data = convert_transaction_maps_to_jsonl(entries)
 
     # Compress JSONL formatted Data
     if output_file.suffix == ".gz":
         compress_jsonl_data(jsonl_data, output_file)
     elif output_file.suffix == ".jsonl":
         dump_jsonl(jsonl_data, output_file)
+    end = time.time()
+    logger.debug(f"Write transactions to JSONL file: {end - start} seconds")
 
-    return entries
+    return entries_with_ids
 
 
 def get_beancount_files(beancount_files=None, beancount_file_filter=None):
@@ -66,12 +83,12 @@ def get_beancount_files(beancount_files=None, beancount_file_filter=None):
     return all_beancount_files
 
 
-def extract_beancount_entries(beancount_files):
+def extract_beancount_transactions(beancount_files):
     "Extract entries from specified Beancount files"
 
     # Initialize Regex for extracting Beancount Entries
     transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] '
-    empty_newline = f'^[{empty_escape_sequences}]*$'
+    empty_newline = f'^[\n\r\t\ ]*$'
 
     entries = []
     transaction_to_file_map = []
@@ -82,22 +99,25 @@ def extract_beancount_entries(beancount_files):
                for entry
                in re.split(empty_newline, ledger_content, flags=re.MULTILINE)
                if re.match(transaction_regex, entry)]
-            transaction_to_file_map += [beancount_file]*len(transactions_per_file)
+            transaction_to_file_map += zip(transactions_per_file, [beancount_file]*len(transactions_per_file))
             entries.extend(transactions_per_file)
-    return entries, transaction_to_file_map
+    return entries, dict(transaction_to_file_map)
 
 
-def convert_beancount_entries_to_jsonl(entries, transaction_to_file_map):
-    "Convert each Beancount transaction to JSON and collate as JSONL"
-    jsonl = ''
-    for entry_id, entry in enumerate(entries):
-        entry_dict = {'compiled': entry, 'raw': entry, 'file': f'{transaction_to_file_map[entry_id]}'}
-        # Convert Dictionary to JSON and Append to JSONL string
-        jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'
+def convert_transactions_to_maps(entries: list[str], transaction_to_file_map) -> list[dict]:
+    "Convert each Beancount transaction into a dictionary"
+    entry_maps = []
+    for entry in entries:
+        entry_maps.append({'compiled': entry, 'raw': entry, 'file': f'{transaction_to_file_map[entry]}'})
 
-    logger.info(f"Converted {len(entries)} to jsonl format")
+    logger.info(f"Converted {len(entries)} transactions to dictionaries")
 
-    return jsonl
+    return entry_maps
+
+
+def convert_transaction_maps_to_jsonl(entries: list[dict]) -> str:
+    "Convert each Beancount transaction dictionary to JSON and collate as JSONL"
+    return ''.join([f'{json.dumps(entry_dict, ensure_ascii=False)}\n' for entry_dict in entries])
 
 
 if __name__ == '__main__':
diff --git a/src/processor/markdown/markdown_to_jsonl.py b/src/processor/markdown/markdown_to_jsonl.py
index a0903fcb..e7fc2779 100644
--- a/src/processor/markdown/markdown_to_jsonl.py
+++ b/src/processor/markdown/markdown_to_jsonl.py
@@ -7,9 +7,10 @@ import pathlib
 import glob
 import re
 import logging
+import time
 
 # Internal Packages
-from src.utils.helpers import get_absolute_path, is_none_or_empty
+from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
 from src.utils.constants import empty_escape_sequences
 from src.utils.jsonl import dump_jsonl, compress_jsonl_data
 
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
 
 
 # Define Functions
-def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file):
+def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file, previous_entries=None):
     # Input Validation
     if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
         print("At least one of markdown-files or markdown-file-filter is required to be specified")
@@ -28,18 +29,34 @@ def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file):
     markdown_files = get_markdown_files(markdown_files, markdown_file_filter)
 
     # Extract Entries from specified Markdown files
-    entries, entry_to_file_map = extract_markdown_entries(markdown_files)
+    start = time.time()
+    current_entries = convert_markdown_entries_to_maps(*extract_markdown_entries(markdown_files))
+    end = time.time()
+    logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
+
+    # Identify, mark and merge any new entries with previous entries
+    start = time.time()
+    if not previous_entries:
+        entries_with_ids = list(enumerate(current_entries))
+    else:
+        entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
+    end = time.time()
+    logger.debug(f"Identify new or updated entries: {end - start} seconds")
 
     # Process Each Entry from All Notes Files
-    jsonl_data = convert_markdown_entries_to_jsonl(entries, entry_to_file_map)
+    start = time.time()
+    entries = list(map(lambda entry: entry[1], entries_with_ids))
+    jsonl_data = convert_markdown_maps_to_jsonl(entries)
 
     # Compress JSONL formatted Data
     if output_file.suffix == ".gz":
         compress_jsonl_data(jsonl_data, output_file)
     elif output_file.suffix == ".jsonl":
         dump_jsonl(jsonl_data, output_file)
+    end = time.time()
+    logger.debug(f"Write markdown entries to JSONL file: {end - start} seconds")
 
-    return entries
+    return entries_with_ids
 
 
 def get_markdown_files(markdown_files=None, markdown_file_filter=None):
@@ -80,24 +97,28 @@ def extract_markdown_entries(markdown_files):
             markdown_content = f.read()
             markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}'
                for entry
-               in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)]
-            entry_to_file_map += [markdown_file]*len(markdown_entries_per_file)
+               in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)
+               if entry.strip(empty_escape_sequences) != '']
+            entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file))
             entries.extend(markdown_entries_per_file)
 
-    return entries, entry_to_file_map
+    return entries, dict(entry_to_file_map)
 
 
-def convert_markdown_entries_to_jsonl(entries, entry_to_file_map):
+def convert_markdown_entries_to_maps(entries: list[str], entry_to_file_map) -> list[dict]:
+    "Convert each Markdown entries into a dictionary"
+    entry_maps = []
+    for entry in entries:
+        entry_maps.append({'compiled': entry, 'raw': entry, 'file': f'{entry_to_file_map[entry]}'})
+
+    logger.info(f"Converted {len(entries)} markdown entries to dictionaries")
+
+    return entry_maps
+
+
+def convert_markdown_maps_to_jsonl(entries):
     "Convert each Markdown entries to JSON and collate as JSONL"
-    jsonl = ''
-    for entry_id, entry in enumerate(entries):
-        entry_dict = {'compiled': entry, 'raw': entry, 'file': f'{entry_to_file_map[entry_id]}'}
-        # Convert Dictionary to JSON and Append to JSONL string
-        jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'
-
-    logger.info(f"Converted {len(entries)} to jsonl format")
-
-    return jsonl
+    return ''.join([f'{json.dumps(entry_dict, ensure_ascii=False)}\n' for entry_dict in entries])
 
 
 if __name__ == '__main__':
diff --git a/src/processor/org_mode/org_to_jsonl.py b/src/processor/org_mode/org_to_jsonl.py
index f1531797..f166810f 100644
--- a/src/processor/org_mode/org_to_jsonl.py
+++ b/src/processor/org_mode/org_to_jsonl.py
@@ -7,10 +7,11 @@ import argparse
 import pathlib
 import glob
 import logging
+import time
 
 # Internal Packages
 from src.processor.org_mode import orgnode
-from src.utils.helpers import get_absolute_path, is_none_or_empty
+from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
 from src.utils.jsonl import dump_jsonl, compress_jsonl_data
 from src.utils import state
 
@@ -19,28 +20,47 @@ logger = logging.getLogger(__name__)
 
 
 # Define Functions
-def org_to_jsonl(org_files, org_file_filter, output_file):
+def org_to_jsonl(org_files, org_file_filter, output_file, previous_entries=None):
     # Input Validation
     if is_none_or_empty(org_files) and is_none_or_empty(org_file_filter):
         print("At least one of org-files or org-file-filter is required to be specified")
         exit(1)
 
     # Get Org Files to Process
+    start = time.time()
     org_files = get_org_files(org_files, org_file_filter)
 
     # Extract Entries from specified Org files
-    entries, file_to_entries = extract_org_entries(org_files)
+    start = time.time()
+    entry_nodes, file_to_entries = extract_org_entries(org_files)
+    end = time.time()
+    logger.debug(f"Parse entries from org files into OrgNode objects: {end - start} seconds")
+
+    start = time.time()
+    current_entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    end = time.time()
+    logger.debug(f"Convert OrgNodes into entry dictionaries: {end - start} seconds")
+
+    # Identify, mark and merge any new entries with previous entries
+    if not previous_entries:
+        entries_with_ids = list(enumerate(current_entries))
+    else:
+       entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
 
     # Process Each Entry from All Notes Files
-    jsonl_data = convert_org_entries_to_jsonl(entries, file_to_entries)
+    start = time.time()
+    entries = map(lambda entry: entry[1], entries_with_ids)
+    jsonl_data = convert_org_entries_to_jsonl(entries)
 
     # Compress JSONL formatted Data
     if output_file.suffix == ".gz":
         compress_jsonl_data(jsonl_data, output_file)
     elif output_file.suffix == ".jsonl":
         dump_jsonl(jsonl_data, output_file)
+    end = time.time()
+    logger.debug(f"Write org entries to JSONL file: {end - start} seconds")
 
-    return entries
+    return entries_with_ids
 
 
 def get_org_files(org_files=None, org_file_filter=None):
@@ -70,16 +90,16 @@ def extract_org_entries(org_files):
     entry_to_file_map = []
     for org_file in org_files:
         org_file_entries = orgnode.makelist(str(org_file))
-        entry_to_file_map += [org_file]*len(org_file_entries)
+        entry_to_file_map += zip(org_file_entries, [org_file]*len(org_file_entries))
         entries.extend(org_file_entries)
 
-    return entries, entry_to_file_map
+    return entries, dict(entry_to_file_map)
 
 
-def convert_org_entries_to_jsonl(entries, entry_to_file_map) -> str:
-    "Convert each Org-Mode entries to JSON and collate as JSONL"
-    jsonl = ''
-    for entry_id, entry in enumerate(entries):
+def convert_org_nodes_to_entries(entries: list[orgnode.Orgnode], entry_to_file_map) -> list[dict]:
+    "Convert Org-Mode entries into list of dictionary"
+    entry_maps = []
+    for entry in entries:
         entry_dict = dict()
 
         # Ignore title notes i.e notes with just headings and empty body
@@ -113,14 +133,17 @@ def convert_org_entries_to_jsonl(entries, entry_to_file_map) -> str:
 
         if entry_dict:
             entry_dict["raw"] = f'{entry}'
-            entry_dict["file"] = f'{entry_to_file_map[entry_id]}'
+            entry_dict["file"] = f'{entry_to_file_map[entry]}'
 
             # Convert Dictionary to JSON and Append to JSONL string
-            jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'
+            entry_maps.append(entry_dict)
 
-    logger.info(f"Converted {len(entries)} to jsonl format")
+    return entry_maps
 
-    return jsonl
+
+def convert_org_entries_to_jsonl(entries: list[dict]) -> str:
+    "Convert each Org-Mode entry to JSON and collate as JSONL"
+    return ''.join([f'{json.dumps(entry_dict, ensure_ascii=False)}\n' for entry_dict in entries])
 
 
 if __name__ == '__main__':
diff --git a/src/processor/org_mode/orgnode.py b/src/processor/org_mode/orgnode.py
index f5c55e7f..31cedbb9 100644
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -69,7 +69,7 @@ def makelist(filename):
    level         = ""
    heading       = ""
    bodytext      = ""
-   tags          = set()      # set of all tags in headline
+   tags          = list()      # set of all tags in headline
    closed_date   = ''
    sched_date    = ''
    deadline_date = ''
@@ -104,14 +104,14 @@ def makelist(filename):
           level = hdng.group(1)
           heading =  hdng.group(2)
           bodytext = ""
-          tags = set()       # set of all tags in headline
+          tags = list()       # set of all tags in headline
           tagsrch = re.search(r'(.*?)\s*:([a-zA-Z0-9].*?):$',heading)
           if tagsrch:
               heading = tagsrch.group(1)
               parsedtags = tagsrch.group(2)
               if parsedtags:
                  for parsedtag in parsedtags.split(':'):
-                    if parsedtag != '': tags.add(parsedtag)
+                    if parsedtag != '': tags.append(parsedtag)
        else:      # we are processing a non-heading line
            if line[:10] == '#+SEQ_TODO':
               kwlist = re.findall(r'([A-Z]+)\(', line)
@@ -237,7 +237,7 @@ class Orgnode(object):
         self.level = len(level)
         self.headline = headline
         self.body = body
-        self.tags = set(tags)     # All tags in the headline
+        self.tags = tags          # All tags in the headline
         self.todo = ""
         self.prty = ""            # empty of A, B or C
         self.scheduled = ""       # Scheduled date
@@ -290,8 +290,8 @@ class Orgnode(object):
 
     def Tags(self):
         """
-        Returns the set of all tags
-        For example, :HOME:COMPUTER: would return {'HOME', 'COMPUTER'}
+        Returns the list of all tags
+        For example, :HOME:COMPUTER: would return ['HOME', 'COMPUTER']
         """
         return self.tags
 
@@ -307,7 +307,7 @@ class Orgnode(object):
         """
         Store all the tags found in the headline.
         """
-        self.tags = set(newtags)
+        self.tags = newtags
 
     def Todo(self):
         """
diff --git a/src/search_filter/word_filter.py b/src/search_filter/word_filter.py
index a177ba38..dd376cbb 100644
--- a/src/search_filter/word_filter.py
+++ b/src/search_filter/word_filter.py
@@ -19,37 +19,24 @@ class WordFilter(BaseFilter):
     required_regex = r'\+"(\w+)" ?'
     blocked_regex = r'\-"(\w+)" ?'
 
-    def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'):
-        self.filter_file = resolve_absolute_path(filter_directory / f"word_filter_{search_type.name.lower()}_index.pkl")
+    def __init__(self, entry_key='raw'):
         self.entry_key = entry_key
-        self.search_type = search_type
-        self.word_to_entry_index = dict()
+        self.word_to_entry_index = defaultdict(set)
         self.cache = LRU()
 
 
     def load(self, entries, regenerate=False):
-        if self.filter_file.exists() and not regenerate:
-            start = time.time()
-            with self.filter_file.open('rb') as f:
-                self.word_to_entry_index = pickle.load(f)
-            end = time.time()
-            logger.debug(f"Load word filter index for {self.search_type} from {self.filter_file}: {end - start} seconds")
-        else:
-            start = time.time()
-            self.cache = {}  # Clear cache on (re-)generating entries_by_word_set
-            entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:'
-            self.word_to_entry_index = defaultdict(set)
-            # Create map of words to entries they exist in
-            for entry_index, entry in enumerate(entries):
-                for word in re.split(entry_splitter, entry[self.entry_key].lower()):
-                    if word == '':
-                        continue
-                    self.word_to_entry_index[word].add(entry_index)
-
-            with self.filter_file.open('wb') as f:
-                pickle.dump(self.word_to_entry_index, f)
-            end = time.time()
-            logger.debug(f"Indexed {len(self.word_to_entry_index)} words of {self.search_type} type for word filter to {self.filter_file}: {end - start} seconds")
+        start = time.time()
+        self.cache = {}  # Clear cache on filter (re-)load
+        entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
+        # Create map of words to entries they exist in
+        for entry_index, entry in enumerate(entries):
+            for word in re.split(entry_splitter, entry[self.entry_key].lower()):
+                if word == '':
+                    continue
+                self.word_to_entry_index[word].add(entry_index)
+        end = time.time()
+        logger.debug(f"Created word filter index: {end - start} seconds")
 
         return self.word_to_entry_index
 
diff --git a/src/search_type/text_search.py b/src/search_type/text_search.py
index 7a80c296..238c4736 100644
--- a/src/search_type/text_search.py
+++ b/src/search_type/text_search.py
@@ -55,15 +55,28 @@ def extract_entries(jsonl_file):
     return load_jsonl(jsonl_file)
 
 
-def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False):
+def compute_embeddings(entries_with_ids, bi_encoder, embeddings_file, regenerate=False):
     "Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
-    # Load pre-computed embeddings from file if exists
+    new_entries = []
+    # Load pre-computed embeddings from file if exists and update them if required
     if embeddings_file.exists() and not regenerate:
         corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
         logger.info(f"Loaded embeddings from {embeddings_file}")
 
-    else:  # Else compute the corpus_embeddings from scratch, which can take a while
-        corpus_embeddings = bi_encoder.encode([entry['compiled'] for entry in entries], convert_to_tensor=True, device=state.device, show_progress_bar=True)
+        # Encode any new entries in the corpus and update corpus embeddings
+        new_entries = [entry['compiled'] for id, entry in entries_with_ids if id is None]
+        if new_entries:
+            new_embeddings = bi_encoder.encode(new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True)
+            existing_entry_ids = [id for id, _ in entries_with_ids if id is not None]
+            existing_embeddings = torch.index_select(corpus_embeddings, 0, torch.tensor(existing_entry_ids)) if existing_entry_ids else torch.Tensor()
+            corpus_embeddings = torch.cat([existing_embeddings, new_embeddings], dim=0)
+    # Else compute the corpus embeddings from scratch
+    else:
+        new_entries = [entry['compiled'] for _, entry in entries_with_ids]
+        corpus_embeddings = bi_encoder.encode(new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True)
+
+    # Save regenerated or updated embeddings to file
+    if new_entries:
         corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
         torch.save(corpus_embeddings, embeddings_file)
         logger.info(f"Computed embeddings and saved them to {embeddings_file}")
@@ -169,10 +182,10 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
 
     # Map notes in text files to (compressed) JSONL formatted file
     config.compressed_jsonl = resolve_absolute_path(config.compressed_jsonl)
-    if not config.compressed_jsonl.exists() or regenerate:
-        text_to_jsonl(config.input_files, config.input_filter, config.compressed_jsonl)
+    previous_entries = extract_entries(config.compressed_jsonl) if config.compressed_jsonl.exists() else None
+    entries_with_indices = text_to_jsonl(config.input_files, config.input_filter, config.compressed_jsonl, previous_entries)
 
-    # Extract Entries
+    # Extract Updated Entries
     entries = extract_entries(config.compressed_jsonl)
     if is_none_or_empty(entries):
         raise ValueError(f"No valid entries found in specified files: {config.input_files} or {config.input_filter}")
@@ -180,7 +193,7 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
 
     # Compute or Load Embeddings
     config.embeddings_file = resolve_absolute_path(config.embeddings_file)
-    corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate)
+    corpus_embeddings = compute_embeddings(entries_with_indices, bi_encoder, config.embeddings_file, regenerate=regenerate)
 
     for filter in filters:
         filter.load(entries, regenerate=regenerate)
diff --git a/src/utils/constants.py b/src/utils/constants.py
index 84c3dfbb..8b443944 100644
--- a/src/utils/constants.py
+++ b/src/utils/constants.py
@@ -2,7 +2,7 @@ from pathlib import Path
 
 app_root_directory = Path(__file__).parent.parent.parent
 web_directory = app_root_directory / 'src/interface/web/'
-empty_escape_sequences = r'\n|\r\t '
+empty_escape_sequences = '\n|\r|\t| '
 
 # default app config to use
 default_config = {
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
index 7ea6580c..e8b2b8ca 100644
--- a/src/utils/helpers.py
+++ b/src/utils/helpers.py
@@ -1,6 +1,8 @@
 # Standard Packages
 import pathlib
 import sys
+import time
+import hashlib
 from os.path import join
 from collections import OrderedDict
 
@@ -79,3 +81,39 @@ class LRU(OrderedDict):
         if len(self) > self.capacity:
             oldest = next(iter(self))
             del self[oldest]
+
+
+def mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=None):
+    # Hash all current and previous entries to identify new entries
+    start = time.time()
+    current_entry_hashes = list(map(lambda e: hashlib.md5(bytes(e[key], encoding='utf-8')).hexdigest(), current_entries))
+    previous_entry_hashes = list(map(lambda e: hashlib.md5(bytes(e[key], encoding='utf-8')).hexdigest(), previous_entries))
+    end = time.time()
+    logger.debug(f"Hash previous, current entries: {end - start} seconds")
+
+    start = time.time()
+    hash_to_current_entries = dict(zip(current_entry_hashes, current_entries))
+    hash_to_previous_entries = dict(zip(previous_entry_hashes, previous_entries))
+
+    # All entries that did not exist in the previous set are to be added
+    new_entry_hashes = set(current_entry_hashes) - set(previous_entry_hashes)
+    # All entries that exist in both current and previous sets are kept
+    existing_entry_hashes = set(current_entry_hashes) & set(previous_entry_hashes)
+
+    # Mark new entries with no ids for later embeddings generation
+    new_entries = [
+        (None, hash_to_current_entries[entry_hash])
+        for entry_hash in new_entry_hashes
+    ]
+    # Set id of existing entries to their previous ids to reuse their existing encoded embeddings
+    existing_entries = [
+        (previous_entry_hashes.index(entry_hash), hash_to_previous_entries[entry_hash])
+        for entry_hash in existing_entry_hashes
+    ]
+
+    existing_entries_sorted = sorted(existing_entries, key=lambda e: e[0])
+    entries_with_ids = existing_entries_sorted + new_entries
+    end = time.time()
+    logger.debug(f"Identify, Mark, Combine new, existing entries: {end - start} seconds")
+
+    return entries_with_ids
\ No newline at end of file
diff --git a/src/utils/jsonl.py b/src/utils/jsonl.py
index 77b5af11..8a034acd 100644
--- a/src/utils/jsonl.py
+++ b/src/utils/jsonl.py
@@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
     with gzip.open(output_path, 'wt') as gzip_file:
         gzip_file.write(jsonl_data)
 
-    logger.info(f'Wrote {len(jsonl_data)} lines to gzip compressed jsonl at {output_path}')
\ No newline at end of file
+    logger.info(f'Wrote jsonl data to gzip compressed jsonl at {output_path}')
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index fdb26557..2a725919 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -59,7 +59,7 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
         compressed_jsonl = content_dir.joinpath('notes.jsonl.gz'),
         embeddings_file = content_dir.joinpath('note_embeddings.pt'))
 
-    filters = [DateFilter(), WordFilter(content_dir, search_type=SearchType.Org), FileFilter()]
+    filters = [DateFilter(), WordFilter(), FileFilter()]
     text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
 
-    return content_config
\ No newline at end of file
+    return content_config
diff --git a/tests/test_beancount_to_jsonl.py b/tests/test_beancount_to_jsonl.py
new file mode 100644
index 00000000..e368e194
--- /dev/null
+++ b/tests/test_beancount_to_jsonl.py
@@ -0,0 +1,84 @@
+# Standard Packages
+import json
+
+# Internal Packages
+from src.processor.ledger.beancount_to_jsonl import extract_beancount_transactions, convert_transactions_to_maps, convert_transaction_maps_to_jsonl
+
+
+def test_no_transactions_in_file(tmp_path):
+    "Handle file with no transactions."
+    # Arrange
+    entry = f'''
+    - Bullet point 1
+    - Bullet point 2
+    '''
+    beancount_file = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Beancount files
+    entry_nodes, file_to_entries = extract_beancount_transactions(beancount_files=[beancount_file])
+
+    # Process Each Entry from All Beancount Files
+    jsonl_string = convert_transaction_maps_to_jsonl(convert_transactions_to_maps(entry_nodes, file_to_entries))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 0
+
+
+def test_single_beancount_transaction_to_jsonl(tmp_path):
+    "Convert transaction from single file to jsonl."
+    # Arrange
+    entry = f'''
+1984-04-01 * "Payee" "Narration"
+Expenses:Test:Test  1.00 KES
+Assets:Test:Test  -1.00 KES
+    '''
+    beancount_file = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Beancount files
+    entries, entry_to_file_map = extract_beancount_transactions(beancount_files=[beancount_file])
+
+    # Process Each Entry from All Beancount Files
+    jsonl_string = convert_transaction_maps_to_jsonl(convert_transactions_to_maps(entries, entry_to_file_map))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 1
+
+
+def test_multiple_transactions_to_jsonl(tmp_path):
+    "Convert multiple transactions from single file to jsonl."
+    # Arrange
+    entry = f'''
+1984-04-01 * "Payee" "Narration"
+Expenses:Test:Test  1.00 KES
+Assets:Test:Test  -1.00 KES
+\t\r
+1984-04-01 * "Payee" "Narration"
+Expenses:Test:Test  1.00 KES
+Assets:Test:Test  -1.00 KES
+'''
+
+    beancount_file = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Beancount files
+    entries, entry_to_file_map = extract_beancount_transactions(beancount_files=[beancount_file])
+
+    # Process Each Entry from All Beancount Files
+    jsonl_string = convert_transaction_maps_to_jsonl(convert_transactions_to_maps(entries, entry_to_file_map))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 2
+
+
+# Helper Functions
+def create_file(tmp_path, entry, filename="ledger.beancount"):
+    beancount_file = tmp_path / f"notes/{filename}"
+    beancount_file.parent.mkdir()
+    beancount_file.touch()
+    beancount_file.write_text(entry)
+    return beancount_file
diff --git a/tests/test_client.py b/tests/test_client.py
index 578c789c..b167bce0 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -132,7 +132,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
 # ----------------------------------------------------------------------------------------------------
 def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
     # Arrange
-    filters = [WordFilter(content_config.org.compressed_jsonl.parent, search_type=SearchType.Org)]
+    filters = [WordFilter()]
     model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
     user_query = 'How to git install application? +"Emacs"'
 
@@ -149,7 +149,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
 # ----------------------------------------------------------------------------------------------------
 def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
     # Arrange
-    filters = [WordFilter(content_config.org.compressed_jsonl.parent, search_type=SearchType.Org)]
+    filters = [WordFilter()]
     model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
     user_query = 'How to git install application? -"clone"'
 
diff --git a/tests/test_markdown_to_jsonl.py b/tests/test_markdown_to_jsonl.py
new file mode 100644
index 00000000..712053e8
--- /dev/null
+++ b/tests/test_markdown_to_jsonl.py
@@ -0,0 +1,81 @@
+# Standard Packages
+import json
+
+# Internal Packages
+from src.processor.markdown.markdown_to_jsonl import extract_markdown_entries, convert_markdown_maps_to_jsonl, convert_markdown_entries_to_maps
+
+
+def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
+    "Convert files with no heading to jsonl."
+    # Arrange
+    entry = f'''
+    - Bullet point 1
+    - Bullet point 2
+    '''
+    markdownfile = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Markdown files
+    entry_nodes, file_to_entries = extract_markdown_entries(markdown_files=[markdownfile])
+
+    # Process Each Entry from All Notes Files
+    jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entry_nodes, file_to_entries))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 1
+
+
+def test_single_markdown_entry_to_jsonl(tmp_path):
+    "Convert markdown entry from single file to jsonl."
+    # Arrange
+    entry = f'''### Heading
+    \t\r
+    Body Line 1
+    '''
+    markdownfile = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Markdown files
+    entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile])
+
+    # Process Each Entry from All Notes Files
+    jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 1
+
+
+def test_multiple_markdown_entries_to_jsonl(tmp_path):
+    "Convert multiple markdown entries from single file to jsonl."
+    # Arrange
+    entry = f'''
+### Heading 1
+    \t\r
+    Heading 1 Body Line 1
+### Heading 2
+    \t\r
+    Heading 2 Body Line 2
+    '''
+    markdownfile = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Markdown files
+    entries, entry_to_file_map = extract_markdown_entries(markdown_files=[markdownfile])
+
+    # Process Each Entry from All Notes Files
+    jsonl_string = convert_markdown_maps_to_jsonl(convert_markdown_entries_to_maps(entries, entry_to_file_map))
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 2
+
+
+# Helper Functions
+def create_file(tmp_path, entry, filename="test.md"):
+    markdown_file = tmp_path / f"notes/{filename}"
+    markdown_file.parent.mkdir()
+    markdown_file.touch()
+    markdown_file.write_text(entry)
+    return markdown_file
diff --git a/tests/test_org_to_jsonl.py b/tests/test_org_to_jsonl.py
index 39b3e91f..eaac5ef8 100644
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -2,7 +2,7 @@
 import json
 
 # Internal Packages
-from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, extract_org_entries
+from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, convert_org_nodes_to_entries, extract_org_entries
 from src.utils.helpers import is_none_or_empty
 
 
@@ -20,10 +20,11 @@ def test_entry_with_empty_body_line_to_jsonl(tmp_path):
 
     # Act
     # Extract Entries from specified Org files
-    entries, entry_to_file_map = extract_org_entries(org_files=[orgfile])
+    entry_nodes, file_to_entries = extract_org_entries(org_files=[orgfile])
 
     # Process Each Entry from All Notes Files
-    jsonl_data = convert_org_entries_to_jsonl(entries, entry_to_file_map)
+    entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_data = convert_org_entries_to_jsonl(entries)
 
     # Assert
     assert is_none_or_empty(jsonl_data)
@@ -46,7 +47,7 @@ def test_entry_with_body_to_jsonl(tmp_path):
     entries, entry_to_file_map = extract_org_entries(org_files=[orgfile])
 
     # Process Each Entry from All Notes Files
-    jsonl_string = convert_org_entries_to_jsonl(entries, entry_to_file_map)
+    jsonl_string = convert_org_entries_to_jsonl(convert_org_nodes_to_entries(entries, entry_to_file_map))
     jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
 
     # Assert
@@ -64,10 +65,11 @@ def test_file_with_no_headings_to_jsonl(tmp_path):
 
     # Act
     # Extract Entries from specified Org files
-    entries, entry_to_file_map = extract_org_entries(org_files=[orgfile])
+    entry_nodes, file_to_entries = extract_org_entries(org_files=[orgfile])
 
     # Process Each Entry from All Notes Files
-    jsonl_string = convert_org_entries_to_jsonl(entries, entry_to_file_map)
+    entries = convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_string = convert_org_entries_to_jsonl(entries)
     jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] 
 
     # Assert
diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py
index eebf7f8a..c1e0aaa9 100644
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@@ -21,7 +21,7 @@ def test_parse_entry_with_no_headings(tmp_path):
     # Assert
     assert len(entries) == 1
     assert entries[0].Heading() == f'{orgfile}'
-    assert entries[0].Tags() == set()
+    assert entries[0].Tags() == list()
     assert entries[0].Body() == "Body Line 1"
     assert entries[0].Priority() == ""
     assert entries[0].Property("ID") == ""
@@ -45,7 +45,7 @@ Body Line 1'''
     # Assert
     assert len(entries) == 1
     assert entries[0].Heading() == "Heading"
-    assert entries[0].Tags() == set()
+    assert entries[0].Tags() == list()
     assert entries[0].Body() == "Body Line 1"
     assert entries[0].Priority() == ""
     assert entries[0].Property("ID") == ""
@@ -79,7 +79,7 @@ Body Line 2'''
     assert len(entries) == 1
     assert entries[0].Heading() == "Heading"
     assert entries[0].Todo() == "DONE"
-    assert entries[0].Tags() == {"Tag1", "TAG2", "tag3"}
+    assert entries[0].Tags() == ["Tag1", "TAG2", "tag3"]
     assert entries[0].Body() == "- Clocked Log 1\nBody Line 1\nBody Line 2"
     assert entries[0].Priority() == "A"
     assert entries[0].Property("ID") == "id:123-456-789-4234-1231"
@@ -178,7 +178,7 @@ Body 2
     for index, entry in enumerate(entries):
         assert entry.Heading() == f"Heading{index+1}"
         assert entry.Todo() == "FAILED" if index == 0 else "CANCELLED"
-        assert entry.Tags() == {f"tag{index+1}"}
+        assert entry.Tags() == [f"tag{index+1}"]
         assert entry.Body() == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
         assert entry.Priority() == "A"
         assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
@@ -202,7 +202,7 @@ Body Line 1'''
     # Assert
     assert len(entries) == 1
     assert entries[0].Heading() == f'{orgfile}'
-    assert entries[0].Tags() == set()
+    assert entries[0].Tags() == list()
     assert entries[0].Body() == "Body Line 1"
     assert entries[0].Priority() == ""
     assert entries[0].Property("ID") == ""
@@ -225,7 +225,7 @@ Body Line 1'''
     # Assert
     assert len(entries) == 1
     assert entries[0].Heading() == 'test'
-    assert entries[0].Tags() == set()
+    assert entries[0].Tags() == list()
     assert entries[0].Body() == "Body Line 1"
     assert entries[0].Priority() == ""
     assert entries[0].Property("ID") == ""
@@ -249,7 +249,7 @@ Body Line 1
     # Assert
     assert len(entries) == 1
     assert entries[0].Heading() == 'title1 title2'
-    assert entries[0].Tags() == set()
+    assert entries[0].Tags() == list()
     assert entries[0].Body() == "Body Line 1\n"
     assert entries[0].Priority() == ""
     assert entries[0].Property("ID") == ""
diff --git a/tests/test_text_search.py b/tests/test_text_search.py
index 6a1471c9..dce1070a 100644
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -100,3 +100,32 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC
     # delete reload test file added
     content_config.org.input_files = []
     file_to_add_on_reload.unlink()
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_incremental_update(content_config: ContentConfig, search_config: SearchConfig):
+    # Arrange
+    initial_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
+
+    assert len(initial_notes_model.entries) == 10
+    assert len(initial_notes_model.corpus_embeddings) == 10
+
+    file_to_add_on_update = Path(content_config.org.input_filter).parent / "update.org"
+    content_config.org.input_files = [f'{file_to_add_on_update}']
+
+    # append Org-Mode Entry to first Org Input File in Config
+    with open(file_to_add_on_update, "w") as f:
+        f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
+
+    # Act
+    # update embeddings, entries with the newly added note
+    initial_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
+
+    # verify new entry added in updated embeddings, entries
+    assert len(initial_notes_model.entries) == 11
+    assert len(initial_notes_model.corpus_embeddings) == 11
+
+    # Cleanup
+    # delete file added for update testing
+    content_config.org.input_files = []
+    file_to_add_on_update.unlink()
diff --git a/tests/test_word_filter.py b/tests/test_word_filter.py
index 3efe8ed9..db23c2c6 100644
--- a/tests/test_word_filter.py
+++ b/tests/test_word_filter.py
@@ -1,15 +1,12 @@
-# External Packages
-import torch
-
 # Application Packages
 from src.search_filter.word_filter import WordFilter
 from src.utils.config import SearchType
 
 
-def test_no_word_filter(tmp_path):
+def test_no_word_filter():
     # Arrange
-    word_filter = WordFilter(tmp_path, SearchType.Org)
-    embeddings, entries = arrange_content()
+    word_filter = WordFilter()
+    entries = arrange_content()
     q_with_no_filter = 'head tail'
 
     # Act
@@ -22,10 +19,10 @@ def test_no_word_filter(tmp_path):
     assert entry_indices == {0, 1, 2, 3}
 
 
-def test_word_exclude_filter(tmp_path):
+def test_word_exclude_filter():
     # Arrange
-    word_filter = WordFilter(tmp_path, SearchType.Org)
-    embeddings, entries = arrange_content()
+    word_filter = WordFilter()
+    entries = arrange_content()
     q_with_exclude_filter = 'head -"exclude_word" tail'
 
     # Act
@@ -38,10 +35,10 @@ def test_word_exclude_filter(tmp_path):
     assert entry_indices == {0, 2}
 
 
-def test_word_include_filter(tmp_path):
+def test_word_include_filter():
     # Arrange
-    word_filter = WordFilter(tmp_path, SearchType.Org)
-    embeddings, entries = arrange_content()
+    word_filter = WordFilter()
+    entries = arrange_content()
     query_with_include_filter = 'head +"include_word" tail'
 
     # Act
@@ -54,10 +51,10 @@ def test_word_include_filter(tmp_path):
     assert entry_indices == {2, 3}
 
 
-def test_word_include_and_exclude_filter(tmp_path):
+def test_word_include_and_exclude_filter():
     # Arrange
-    word_filter = WordFilter(tmp_path, SearchType.Org)
-    embeddings, entries = arrange_content()
+    word_filter = WordFilter()
+    entries = arrange_content()
     query_with_include_and_exclude_filter = 'head +"include_word" -"exclude_word" tail'
 
     # Act
@@ -71,11 +68,10 @@ def test_word_include_and_exclude_filter(tmp_path):
 
 
 def arrange_content():
-    embeddings = torch.randn(4, 10)
     entries = [
         {'compiled': '', 'raw': 'Minimal Entry'},
         {'compiled': '', 'raw': 'Entry with exclude_word'},
         {'compiled': '', 'raw': 'Entry with include_word'},
         {'compiled': '', 'raw': 'Entry with include_word and exclude_word'}]
 
-    return embeddings, entries
+    return entries