Wednesday, April 17, 2013

Using I2C FIFO


FIFO will enhance the performance of i2c,
Mutli byte transfer.
If slave device is having something like 32 bit of register address and data.

Below is an example of reading a i2c slave register.
Register address and data is 32 bits wide.



When FIFO is disabled it took 2.47 ms to complete the entire transaction compared to 391 us when FIFO enabled.

Tuesday, April 16, 2013

I2C Clock Stretching




Clock stretching is a phenomenon where the I2C slave pull the SCL line low on the 9th clock of  every I2C data transfer (before the ACK stage). This is done to inform the master that client is not ready for another transfer.

He is a classic example,




The exact verbatim of clk stretching from the i2c spec is as follows,

“After each acknowledge (A) or not-acknowledge bit (A) the active master disables its current-source pull-up circuit. This enables other devices to delay the serial transfer by stretching the LOW period of the SCLH signal. The active master re-enables its current-source pull-up circuit again when all devices are released and the SCLH signal reaches a HIGH level,”

Thursday, March 21, 2013

Debuggin Oops using Crash Utility


Crash log snippet of binder crash,

Before you start make sure the ramdump and vmlinux matches by using strings command,
strings vmlinux | grep "Linux version"

[49081.080352] Unable to handle kernel NULL pointer dereference at virtual address 00000030
[49081.088500] pgd = d748c000
[49081.091186] [00000030] *pgd=00000000
[49081.094787] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
[49081.100067] kona_fb: die notifier invoked
[49081.104064] kona_display_crash_image:359 image_idx=4
[49081.109863] post_async:540 AXIPV has received the same buffer which is used
[49081.116790] post_async:541 It is likely that we see display tearing
[49081.123046] post_async:542 Preventing a potential lock-up by signalling
[49081.129669] Modules linked in: [last unloaded: bcmdhd_4330]
[49081.135253] CPU: 1    Tainted: G        W     (3.4.5+ #1)
[49081.140655] PC is at binder_get_node+0x8/0x34
[49081.144989] LR is at binder_thread_write+0x147c/0x239c
[49081.150115] pc : []    lr : []    psr: 20000053
[49081.150146] sp : f43a3e10  ip : f8f004fc  fp : 00000000
[49081.161560] r10: f8f004c4  r9 : 73622a85  r8 : 00000064
[49081.166778] r7 : f43a2000  r6 : f8f00460  r5 : 411e36f8  r4 : c0a69260
[49081.173278] r3 : 00000010  r2 : 411e3858  r1 : 411e36f8  r0 : f32c7a40
[49081.179779] Flags: nzCv  IRQs on  FIQs off  Mode SVC_32  ISA ARM  Segment user
[49081.186981] Control: 10c53c7d  Table: 9948c04a  DAC: 00000015

Disassemble of binder_get_node,

crash> dis binder_get_node
0xc03dc4a8 :   ldr     r3, [r0, #12]
0xc03dc4ac : b       0xc03dc4cc
0xc03dc4b0 : ldr     r2, [r3, #32]
0xc03dc4b4 :        sub     r0, r3, #16
0xc03dc4b8 :        cmp     r1, r2
0xc03dc4bc :        ldrcc   r3, [r3, #8]
0xc03dc4c0 :        bcc     0xc03dc4cc
0xc03dc4c4 :        bxls    lr
0xc03dc4c8 :        ldr     r3, [r3, #4]
0xc03dc4cc :        cmp     r3, #0
0xc03dc4d0 :        bne     0xc03dc4b0
0xc03dc4d4 :        mov     r0, r3
0xc03dc4d8 :        bx      lr
crash>

ldr  r3,[r0, #12]
From crash log above, r0 = f32c7a40

To confirm read memory 0xf32c7a40 + 0xc using  crash,

crash> rd f32c7a4c
f32c7a4c:  00008000                              ....
crash>

But r3 is 0x10. Which is not matching with 0x8000. So this means r0 has changed (since ldr  r3,[r0, #12]). Proves that crash not happened in first instance. It had looped for some time

C code binder_get_node,

static struct binder_node *binder_get_node(struct binder_proc *proc,
                                           void __user *ptr)
{
        struct rb_node *n = proc->nodes.rb_node;
        struct binder_node *node;

        while (n) {
                node = rb_entry(n, struct binder_node, rb_node);
                if (ptr < node->ptr)
                        n = n->rb_left;
                else if (ptr > node->ptr)
                        n = n->rb_right;
                else
                        return node;
        }
        return NULL;
}

Back trace from crash log,

[49081.971740] [] (binder_get_node+0x8/0x34) from [] (binder_thread_write+0x147c/0x239c)
[49081.981262] [] (binder_thread_write+0x147c/0x239c) from [] (binder_ioctl+0x37c/0x974)
[49081.990814] [] (binder_ioctl+0x37c/0x974) from [] (do_vfs_ioctl+0x4d8/0x54c)
[49081.999572] [] (do_vfs_ioctl+0x4d8/0x54c) from [] (sys_ioctl+0x34/0x54)
[49082.007904] [] (sys_ioctl+0x34/0x54) from [] (ret_fast_syscall+0x0/0x48)

Since the back trace was showing a sys_ioctl we iterated on stack to pick up the struct file * associated with.
Here is the hit,

crash> struct file f37cbef0
struct file {
  f_u = {
    fu_list = {
      next = 0xf37cb550,
      prev = 0xf2a0def0
    },
    fu_rcuhead = {
      next = 0xf37cb550,
      func = 0xf2a0def0
    }
 },
  f_path = {
    mnt = 0xf5015e58,
    dentry = 0xf3cc5d08
  },
  f_op = 0xc064dc0c,
----sniped---
  f_version = 0,
  private_data = 0xf2e9ca00,
  f_ep_links = {
    next = 0xf37cbf74,
    prev = 0xf37cbf74
  },
  f_tfile_llink = {
    next = 0xf37cbf7c,
    prev = 0xf37cbf7c
  },
  f_mapping = 0xf4090ad0
}

f_op symbol says it is binder_fops,

crash> sym 0xc064dc0c
c064dc0c (r) binder_fops
crash>

From code(drivers/staging/android/binder.c +3012) we figured out that private_data is binder_proc

crash> struct binder_proc 0xf2e9ca00
struct binder_proc {
  proc_node = {
    next = 0xf3514200,
    pprev = 0xc0a69c84
  },
  threads = {
    rb_node = 0xf2a10c84
  },
  nodes = {
    rb_node = 0xf2d1a7d0
  },
  refs_by_desc = {
    rb_node = 0xf3126284
  },
  refs_by_node = {

From the analysis of the code, we understand that crash happened while traversing the rb tree.
So tried to parse the rb tree using crash,

crash> tree -t rbtree -N 0xf2d1a7d0 -p
f2d1a7d0
  position: root
f2a00ad0
  position: root/l
f2a90e50
  position: root/r
f32c7a50
  position: root/r/r
10
  position: root/r/r/l
tree: invalid kernel virtual address: 18  type: "rb_node rb_left"
crash>

This shows that rb tress has a corrupted address to next left node.

0x10 which is the same value in r3 during crash.
And crash happened because of address 0x30.
[49081.080352] Unable to handle kernel NULL pointer dereference at virtual address 00000030

This explains why crashed.
0xc03dc4b0 : ldr     r2, [r3, #32]

R3 + #32 = 0x10 + 0x20 = 0x30

Dumping the corrupted struct rb_node from memory,
crash> struct rb_node f32c7a50
struct rb_node {
  rb_parent_color = 44100,
  rb_right = 0x2,
  rb_left = 0x10
}
crash>

This shows the rb tree is corrupted. Because rb_left and rb_right should have a valid kernel address or 0.

Dumping the memory from f32c7a00, 100 elements to inspect the memory,
crash> rd f32c7a00 100
f32c7a00:  f32c79fc 00000000 00000000 00000000   .y,.............
f32c7a10:  00000000 00000000 00000000 00000000   ................
f32c7a20:  00000000 00000000 00000000 00000000   ................
f32c7a30:  00000000 00000000 00000000 00000000   ................
f32c7a40:  01bb5e6b c03eed84 f4727600 00008000   k^....>..vr.....
f32c7a50:  0000ac44 00000002 00000010 f5c10000   D...............
f32c7a60:  00010000 b7c10000 00000000 00000000   ................
f32c7a70:  411e3858 411e3840 00001fe5 00000002   X8.A@8.A........
f32c7a80:  f32c7a7c 00000001 00000000 00000000   |z,.............
f32c7a90:  00000000 00000000 00000000 00000000   ................
f32c7aa0:  00000000 00000000 00000000 00000000   ................
f32c7ab0:  00000000 00000000 00000000 00000000   ................
f32c7ac0:  00000000 00000000 f32c7ac8 f32c7ac8   .........z,..z,.
f32c7ad0:  00000000 00000000 00000010 00000001   ................
f32c7ae0:  00000001 00000000 00000001 00000001   ................
f32c7af0:  00000000 00000000 00000000 f3faf330   ............0...
f32c7b00:  f3a6c000 00000000 00000000 00000000   ................
f32c7b10:  00000000 00000000 00000000 00000000   ................
f32c7b20:  00000000 00000000 00000000 00000000   ................
f32c7b30:  00000000 00000000 00000000 00000000   ................
f32c7b40:  006aefa7 f32c7b44 f32c7b44 00000003   ..j.D{,.D{,.....
f32c7b50:  f2bf1450 f3bef7d0 00000000 00000000   P...............
f32c7b60:  f3854bdc 00000001 00000000 00000000   .K..............
f32c7b70:  54d4c670 54d3f828 00001fe5 f32c7b7c   p..T(..T....|{,.
f32c7b80:  f32c7b7c 00000000 00000000 00000000   |{,.............

Dumping with symbols,
crash> rd -s f32c7a00 100
f32c7a00:  __crc_v4l2_norm_to_name+482177 00000000 00000000 00000000
f32c7a10:  00000000 00000000 00000000 00000000
f32c7a20:  00000000 00000000 00000000 00000000
f32c7a30:  00000000 00000000 00000000 00000000
f32c7a40:  01bb5e6b AUDIO_DRIVER_InterruptPeriodCB __crc_bmp18x_disable+1706299 00008000
f32c7a50:  0000ac44 00000002 00000010 __crc_generic_segment_checks+42732
f32c7a60:  00010000 b7c10000 00000000 00000000
f32c7a70:  411e3858 411e3840 00001fe5 00000002
f32c7a80:  __crc_v4l2_norm_to_name+482305 00000001 00000000 00000000
f32c7a90:  00000000 00000000 00000000 00000000
f32c7aa0:  00000000 00000000 00000000 00000000
f32c7ab0:  00000000 00000000 00000000 00000000
f32c7ac0:  00000000 00000000 __crc_v4l2_norm_to_name+482381 __crc_v4l2_norm_to_name+482381
f32c7ad0:  00000000 00000000 00000010 00000001
f32c7ae0:  00000001 00000000 00000001 00000001
f32c7af0:  00000000 00000000 00000000 __crc_qdisc_class_hash_grow+482810
f32c7b00:  __crc_rt_mutex_trylock+115850 00000000 00000000 00000000
f32c7b10:  00000000 00000000 00000000 00000000
f32c7b20:  00000000 00000000 00000000 00000000
f32c7b30:  00000000 00000000 00000000 00000000
f32c7b40:  006aefa7 __crc_v4l2_norm_to_name+482505 __crc_v4l2_norm_to_name+482505 00000003
f32c7b50:  __crc_usb_get_descriptor+260561 __crc_bdi_set_max_ratio+149137 00000000 00000000
f32c7b60:  __crc_snd_interval_ratnum+776842 00000001 00000000 00000000
f32c7b70:  54d4c670 54d3f828 00001fe5 __crc_v4l2_norm_to_name+482561
f32c7b80:  __crc_v4l2_norm_to_name+482561 00000000 00000000 00000000

Somebody has over written from f32c7a40
This causes rb_tree corruption.
Surprisingly there is an audio function address in the corrupted memory as highlighted above.


From the kernel logs we can see audio and rpc errors just before the crash.
Eg:
[48902.141143] WARNING: at /build/REL_sdb-common-android-jb-4.2.2_428843/android_hawaii_garnet_edn010/kernel/hawaii/3.4.5/sound/caph_hawaii/caph_pcm.c:248 PcmHwFree+0x8c/0xdc()
[48902.157012] ERROR timeout waiting for STOP REQ.t=30000 ret=0
[48902.162719] Modules linked in: [last unloaded: bcmdhd_4330]
[48902.168579] [] (unwind_backtrace+0x0/0x11c) from [] (warn_slowpath_common+0x4c/0x64)
[48902.178192] [] (warn_slowpath_common+0x4c/0x64) from [] (warn_slowpath_fmt+0x2c/0x3c)
[48902.187957] [] (warn_slowpath_fmt+0x2c/0x3c) from [] (PcmHwFree+0x8c/0xdc)
[48902.196716] [] (PcmHwFree+0x8c/0xdc) from [] (snd_pcm_release_substream+0x44/0xa4)
[48902.206512] [] (snd_pcm_release_substream+0x44/0xa4) from [] (snd_pcm_release+0x28/0x6c)
[48902.216735] [] (snd_pcm_release+0x28/0x6c) from [] (fput+0x10c/0x214)
[48902.224914] [] (fput+0x10c/0x214) from [] (filp_close+0x70/0x7c)
[48902.232879] [] (filp_close+0x70/0x7c) from [] (put_files_struct+0x90/0xec)
[48902.241668] [] (put_files_struct+0x90/0xec) from [] (do_exit+0x280/0x7e0)
[48902.250671] [] (do_exit+0x280/0x7e0) from [] (do_group_exit+0x88/0xb4)
[48902.259338] [] (do_group_exit+0x88/0xb4) from [] (get_signal_to_deliver+0x510/0x568)
[48902.269470] [] (get_signal_to_deliver+0x510/0x568) from [] (do_signal+0xc4/0x518)
[48902.278808] [] (do_signal+0xc4/0x518) from [] (do_notify_resume+0x18/0x50)
[48902.287567] [] (do_notify_resume+0x18/0x50) from [] (work_pending+0x24/0x28)

Conclusion is one possible suspect for corruption can be Audio.

Interrupts are disabled in ISR

Linux kernel Stack Memory


Tuesday, February 5, 2013

T32 Simulator

We use simulator to debug ramdumps

To view a memory as structure,
var.view (struct rcu_dynticks*)0xC162A228

To view core 1 register,
register /CORE 1

To view core 1 stack frame,
var.frame /core 1



Friday, February 1, 2013

Debugging an Oops..


<1>[  161.824645] Unable to handle kernel NULL pointer dereference at virtual address 00000132
<1>[  161.832702] pgd = dba48000
<1>[  161.835418] [00000132] *pgd=00000000
<0>[  161.839019] Internal error: Oops: 1 [#1] PREEMPT SMP ARM
<3>[  161.844329] kona_fb: die notifier invoked
<3>[  161.848327] kona_display_crash_image:355 image_idx=2
<3>[  161.854614] post_async:540 AXIPV has received the same buffer which is used
<3>[  161.861572] post_async:541 It is likely that we see display tearing
<3>[  161.867828] post_async:542 Preventing a potential lock-up by signalling
<4>[  161.874450] Modules linked in:
<4>[  161.877532] CPU: 1    Tainted: G        W     (3.4.5+ #1)
<4>[  161.882934] PC is at ttwu_stat+0x70/0x168
<4>[  161.886932] LR is at ttwu_stat+0x2c/0x168
<4>[  161.890930] pc : []    lr : []    psr: 200000d3
<4>[  161.890960] sp : dbb87e48  ip : 00000001  fp : dbb87e74
<4>[  161.902374] r10: 00000002  r9 : c162c000  r8 : 00000001
<4>[  161.907592] r7 : c093c000  r6 : 00000002  r5 : 00000000  r4 : 00000001
<4>[  161.914123] r3 : 00000002  r2 : dbb87e38  r1 : 00000130  r0 : 00000001
<4>[  161.920623] Flags: nzCv  IRQs off  FIQs off  Mode SVC_32  ISA ARM  Segment user
<4>[  161.927917] Control: 10c53c7d  Table: 9da4804a  DAC: 00000015

From System.map find out the address of ttwu_stat
------------------------
c007fe8c t ttwu_stat
------------------------

Now find the end of function (ttwu_stat+0x70/0x168) is
0xc007fe8c + 0x168 = 0xC007FFF4

Disassemble the code as follows,
arm-eabi-objdump -dS --start-address=0xc007fe8c --stop-address=0xC007FFF4 vmlinux



dumps/vmlinux:     file format elf32-littlearm


Disassembly of section .text:

c007fe8c :
BFD: Dwarf Error: mangled line number section.
c007fe8c:       e92d4ff0        push    {r4, r5, r6, r7, r8, r9, sl, fp, lr}
c007fe90:       e28db020        add     fp, sp, #32     ; 0x20
c007fe94:       e24dd00c        sub     sp, sp, #12     ; 0xc
c007fe98:       e1a06001        mov     r6, r1
c007fe9c:       e1a0a000        mov     sl, r0
c007fea0:       e50b2028        str     r2, [fp, #-40]
c007fea4:       eb06c42b        bl      c0230f58
c007fea8:       e59f213c        ldr     r2, [pc, #316]  ; c007ffec
c007feac:       e59f713c        ldr     r7, [pc, #316]  ; c007fff0
c007feb0:       e7929100        ldr     r9, [r2, r0, lsl #2]
c007feb4:       eb06c427        bl      c0230f58
c007feb8:       e0879009        add     r9, r7, r9
c007febc:       e1560000        cmp     r6, r0
c007fec0:       e1a08000        mov     r8, r0
c007fec4:       1a00000a        bne     c007fef4
c007fec8:       e5993548        ldr     r3, [r9, #1352]
c007fecc:       e3a0cf4a        mov     ip, #296        ; 0x128
c007fed0:       e3a00001        mov     r0, #1  ; 0x1
c007fed4:       e3a01000        mov     r1, #0  ; 0x0
c007fed8:       e2833001        add     r3, r3, #1      ; 0x1
c007fedc:       e5893548        str     r3, [r9, #1352]
c007fee0:       e18a20dc        ldrd    r2, [sl, ip]
c007fee4:       e0922000        adds    r2, r2, r0
c007fee8:       e0a33001        adc     r3, r3, r1
c007feec:       e18a20fc        strd    r2, [sl, ip]
c007fef0:       ea00001f        b       c007ff74
c007fef4:       e3a01e13        mov     r1, #304        ; 0x130
c007fef8:       e3a04001        mov     r4, #1  ; 0x1
c007fefc:       e18a20d1        ldrd    r2, [sl, r1]
c007ff00:       e3a05000        mov     r5, #0  ; 0x0
c007ff04:       e0922004        adds    r2, r2, r4
c007ff08:       e0a33005        adc     r3, r3, r5
c007ff0c:       e18a20f1        strd    r2, [sl, r1]
c007ff10:       eb00cfde        bl      c00b3e90 <__rcu_read_lock>
c007ff14:       e59f20d0        ldr     r2, [pc, #208]  ; c007ffec
c007ff18:       e3560000        cmp     r6, #0  ; 0x0
c007ff1c:       e3a01001        mov     r1, #1  ; 0x1
c007ff20:       e7923108        ldr     r3, [r2, r8, lsl #2]
c007ff24:       e286201f        add     r2, r6, #31     ; 0x1f
c007ff28:       a1a02006        movge   r2, r6
c007ff2c:       e206601f        and     r6, r6, #31     ; 0x1f
c007ff30:       e0877003        add     r7, r7, r3
c007ff34:       e1a022c2        asr     r2, r2, #5
c007ff38:       e1a06611        lsl     r6, r1, r6
c007ff3c:       e59734a8        ldr     r3, [r7, #1192]
c007ff40:       e1a02102        lsl     r2, r2, #2
c007ff44:       ea000007        b       c007ff68
c007ff48:       e0831002        add     r1, r3, r2
c007ff4c:       e59110f8        ldr     r1, [r1, #248]
c007ff50:       e1110006        tst     r1, r6
c007ff54:       159320dc        ldrne   r2, [r3, #220]
c007ff58:       12822001        addne   r2, r2, #1      ; 0x1
c007ff5c:       158320dc        strne   r2, [r3, #220]
c007ff60:       1a000002        bne     c007ff70
c007ff64:       e5933000        ldr     r3, [r3]
c007ff68:       e3530000        cmp     r3, #0  ; 0x0
c007ff6c:       1afffff5        bne     c007ff48
c007ff70:       eb00d511        bl      c00b53bc <__rcu_read_unlock>
c007ff74:       e51b3028        ldr     r3, [fp, #-40]
c007ff78:       e3130004        tst     r3, #4  ; 0x4
c007ff7c:       0a000006        beq     c007ff9c
c007ff80:       e3a0ce12        mov     ip, #288        ; 0x120
c007ff84:       e3a00001        mov     r0, #1  ; 0x1
c007ff88:       e18a20dc        ldrd    r2, [sl, ip]
c007ff8c:       e3a01000        mov     r1, #0  ; 0x0
c007ff90:       e0922000        adds    r2, r2, r0
c007ff94:       e0a33001        adc     r3, r3, r1
c007ff98:       e18a20fc        strd    r2, [sl, ip]
c007ff9c:       e5993544        ldr     r3, [r9, #1348]
c007ffa0:       e3a0ce11        mov     ip, #272        ; 0x110
c007ffa4:       e3a00001        mov     r0, #1  ; 0x1
c007ffa8:       e3a01000        mov     r1, #0  ; 0x0
c007ffac:       e2833001        add     r3, r3, #1      ; 0x1
c007ffb0:       e5893544        str     r3, [r9, #1348]
c007ffb4:       e18a20dc        ldrd    r2, [sl, ip]
c007ffb8:       e0922000        adds    r2, r2, r0
c007ffbc:       e0a33001        adc     r3, r3, r1
c007ffc0:       e18a20fc        strd    r2, [sl, ip]
c007ffc4:       e51b2028        ldr     r2, [fp, #-40]
c007ffc8:       e3120001        tst     r2, #1  ; 0x1
c007ffcc:       0a000004        beq     c007ffe4
c007ffd0:       e3a0cf46        mov     ip, #280        ; 0x118
c007ffd4:       e18a20dc        ldrd    r2, [sl, ip]
c007ffd8:       e0922000        adds    r2, r2, r0
c007ffdc:       e0a33001        adc     r3, r3, r1
c007ffe0:       e18a20fc        strd    r2, [sl, ip]
c007ffe4:       e24bd020        sub     sp, fp, #32     ; 0x20
c007ffe8:       e8bd8ff0        pop     {r4, r5, r6, r7, r8, r9, sl, fp, pc}
c007ffec:       c095e988        .word   0xc095e988
c007fff0:       c093c000        .word   0xc093c000


The PC is show in read above. So the instruction resulted in crash is,
c007fefc:       e18a20d1        ldrd    r2, [sl, r1]

i.e, reading a double word (ldrd) from address (s1(ie r10) and r1)
From the opps message r10 = 2 and r1 = 130. Both are highlighted in red.

so trying to load from address 132 which is an invalid address.

Possible mistakes,
Match the time stamp of ramdump and vmlinux first.
strings dumps/AP__0x82000000.hex | grep "Linux version"

strings dumps/vmlinux | grep "Linux version"

Both timestamps should match.