/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.server; import android.content.ContentResolver; import android.content.Context; import android.os.Build; import android.os.Environment; import android.os.FileUtils; import android.os.RecoverySystem; import android.os.SystemClock; import android.os.SystemProperties; import android.os.UserHandle; import android.provider.Settings; import android.text.format.DateUtils; import android.util.ExceptionUtils; import android.util.Log; import android.util.MathUtils; import android.util.Slog; import android.util.SparseArray; import com.android.internal.util.ArrayUtils; import com.android.server.pm.PackageManagerService; import java.io.File; /** * Utilities to help rescue the system from crash loops. Callers are expected to * report boot events and persistent app crashes, and if they happen frequently * enough this class will slowly escalate through several rescue operations * before finally rebooting and prompting the user if they want to wipe data as * a last resort. * * @hide */ public class RescueParty { private static final String TAG = "RescueParty"; private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue"; private static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device"; private static final int LEVEL_NONE = 0; private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1; private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2; private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3; private static final int LEVEL_FACTORY_RESET = 4; /** Threshold for boot loops */ private static final Threshold sBoot = new BootThreshold(); /** Threshold for app crash loops */ private static SparseArray sApps = new SparseArray<>(); private static boolean isDisabled() { // Check if we're explicitly enabled for testing if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) { return false; } // We're disabled on all engineering devices if (Build.IS_ENG) { Slog.v(TAG, "Disabled because of eng build"); return true; } // We're disabled on userdebug devices connected over USB, since that's // a decent signal that someone is actively trying to debug the device, // or that it's in a lab environment. if (Build.IS_USERDEBUG && isUsbActive()) { Slog.v(TAG, "Disabled because of active USB connection"); return true; } // One last-ditch check if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) { Slog.v(TAG, "Disabled because of manual property"); return true; } return false; } /** * Take note of a boot event. If we notice too many of these events * happening in rapid succession, we'll send out a rescue party. */ public static void noteBoot(Context context) { if (isDisabled()) return; if (sBoot.incrementAndTest()) { sBoot.reset(); incrementRescueLevel(sBoot.uid); executeRescueLevel(context); } } /** * Take note of a persistent app crash. If we notice too many of these * events happening in rapid succession, we'll send out a rescue party. */ public static void notePersistentAppCrash(Context context, int uid) { if (isDisabled()) return; Threshold t = sApps.get(uid); if (t == null) { t = new AppThreshold(uid); sApps.put(uid, t); } if (t.incrementAndTest()) { t.reset(); incrementRescueLevel(t.uid); executeRescueLevel(context); } } /** * Check if we're currently attempting to reboot for a factory reset. */ public static boolean isAttemptingFactoryReset() { return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET; } /** * Escalate to the next rescue level. After incrementing the level you'll * probably want to call {@link #executeRescueLevel(Context)}. */ private static void incrementRescueLevel(int triggerUid) { final int level = MathUtils.constrain( SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, LEVEL_NONE, LEVEL_FACTORY_RESET); SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); EventLogTags.writeRescueLevel(level, triggerUid); PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to " + levelToString(level) + " triggered by UID " + triggerUid); } /** * Called when {@code SettingsProvider} has been published, which is a good * opportunity to reset any settings depending on our rescue level. */ public static void onSettingsProviderPublished(Context context) { executeRescueLevel(context); } private static void executeRescueLevel(Context context) { final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); if (level == LEVEL_NONE) return; Slog.w(TAG, "Attempting rescue level " + levelToString(level)); try { executeRescueLevelInternal(context, level); EventLogTags.writeRescueSuccess(level); PackageManagerService.logCriticalInfo(Log.DEBUG, "Finished rescue level " + levelToString(level)); } catch (Throwable t) { final String msg = ExceptionUtils.getCompleteMessage(t); EventLogTags.writeRescueFailure(level, msg); PackageManagerService.logCriticalInfo(Log.ERROR, "Failed rescue level " + levelToString(level) + ": " + msg); } } private static void executeRescueLevelInternal(Context context, int level) throws Exception { switch (level) { case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS); break; case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES); break; case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS); break; case LEVEL_FACTORY_RESET: RecoverySystem.rebootPromptAndWipeUserData(context, TAG); break; } } private static void resetAllSettings(Context context, int mode) throws Exception { // Try our best to reset all settings possible, and once finished // rethrow any exception that we encountered Exception res = null; final ContentResolver resolver = context.getContentResolver(); try { Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM); } catch (Throwable t) { res = new RuntimeException("Failed to reset global settings", t); } for (int userId : getAllUserIds()) { try { Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId); } catch (Throwable t) { res = new RuntimeException("Failed to reset secure settings for " + userId, t); } } if (res != null) { throw res; } } /** * Threshold that can be triggered if a number of events occur within a * window of time. */ private abstract static class Threshold { public abstract int getCount(); public abstract void setCount(int count); public abstract long getStart(); public abstract void setStart(long start); private final int uid; private final int triggerCount; private final long triggerWindow; public Threshold(int uid, int triggerCount, long triggerWindow) { this.uid = uid; this.triggerCount = triggerCount; this.triggerWindow = triggerWindow; } public void reset() { setCount(0); setStart(0); } /** * @return if this threshold has been triggered */ public boolean incrementAndTest() { final long now = SystemClock.elapsedRealtime(); final long window = now - getStart(); if (window > triggerWindow) { setCount(1); setStart(now); return false; } else { int count = getCount() + 1; setCount(count); EventLogTags.writeRescueNote(uid, count, window); Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last " + (window / 1000) + " sec"); return (count >= triggerCount); } } } /** * Specialization of {@link Threshold} for monitoring boot events. It stores * counters in system properties for robustness. */ private static class BootThreshold extends Threshold { public BootThreshold() { // We're interested in 5 events in any 300 second period; this // window is super relaxed because booting can take a long time if // forced to dexopt things. super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS); } @Override public int getCount() { return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0); } @Override public void setCount(int count) { SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count)); } @Override public long getStart() { return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); } @Override public void setStart(long start) { SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start)); } } /** * Specialization of {@link Threshold} for monitoring app crashes. It stores * counters in memory. */ private static class AppThreshold extends Threshold { private int count; private long start; public AppThreshold(int uid) { // We're interested in 5 events in any 30 second period; apps crash // pretty quickly so we can keep a tight leash on them. super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS); } @Override public int getCount() { return count; } @Override public void setCount(int count) { this.count = count; } @Override public long getStart() { return start; } @Override public void setStart(long start) { this.start = start; } } private static int[] getAllUserIds() { int[] userIds = { UserHandle.USER_SYSTEM }; try { for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) { try { final int userId = Integer.parseInt(file.getName()); if (userId != UserHandle.USER_SYSTEM) { userIds = ArrayUtils.appendInt(userIds, userId); } } catch (NumberFormatException ignored) { } } } catch (Throwable t) { Slog.w(TAG, "Trouble discovering users", t); } return userIds; } /** * Hacky test to check if the device has an active USB connection, which is * a good proxy for someone doing local development work. */ private static boolean isUsbActive() { if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) { Slog.v(TAG, "Assuming virtual device is connected over USB"); return true; } try { final String state = FileUtils .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, ""); return "CONFIGURED".equals(state.trim()); } catch (Throwable t) { Slog.w(TAG, "Failed to determine if device was on USB", t); return false; } } private static String levelToString(int level) { switch (level) { case LEVEL_NONE: return "NONE"; case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS"; case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES"; case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS"; case LEVEL_FACTORY_RESET: return "FACTORY_RESET"; default: return Integer.toString(level); } } }