Clean up old data in DB

Hybris has possibility to clean up old unnecessary data from a database. OOTB is provided CronJob to remove old CronJobs, but there is nothing to clean up custom item types or even Hybris item types, like business processes and task logs.

OOTB Hybris provides GenericMaintenanceJobPerformable, which allows to execute any DB maintenance tasks, which would have low impact on DB and CPU/RAM usage of hybris execution node. To utilize that it is enough to implement MaintenanceCleanupStrategy. Also there is common sense to make such job configurable with list of item types for removing and modified time threshold.

For that purpose would be introducedCleanUpItemConfig item type, DbCleanUpCronjob item type for custom CronJob and relation between them:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
        <relation code="DbCleanUpCronjob2CleanUpItemConfig" localized="false" generate="true" autocreate="true">
            <sourceElement type="DbCleanUpCronjob" cardinality="one" qualifier="job">
                <modifiers read="true" write="true" search="true" initial="true" optional="true" unique="true"/>
            </sourceElement>
            <targetElement type="CleanUpItemConfig" cardinality="many" qualifier="itemsConfig" collectiontype="set">
                <modifiers read="true" write="true" search="true" optional="true" partof="true"/>
            </targetElement>
        </relation>

        <typegroup name="cronjob">
            <itemtype code="DbCleanUpCronjob" extends="CronJob" generate="true" autocreate="true">
                <description>Cronjob required to clean up database tables</description>
            </itemtype>
            <itemtype code="CleanUpItemConfig" generate="true" autocreate="true">
                <description>DbCleanUpCronjob item config</description>
                <deployment table="cleanupitemconfig" typecode="15576"/>
                <attributes>
                    <attribute qualifier="item" type="ComposedType">
                        <modifiers unique="true" optional="false" write="true" initial="true"
                                   read="true" removable="true" search="true"/>
                        <persistence type="property"/>
                    </attribute>
                    <attribute qualifier="days" type="java.lang.Integer">
                        <modifiers optional="false" write="true" read="true" removable="true" initial="true"/>
                        <persistence type="property"/>
                    </attribute>
                </attributes>
                <indexes>
                    <index name="itemIndex">
                        <key attribute="item"/>
                    </index>
                </indexes>
            </itemtype>
        </typegroup>

After creation of item types could be implemented MaintenanceCleanupStrategy. The main ideas is to retrieve from DB list of item types for removal and get all instances of provided item types with modified time threshold:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
package com.blog.core.cronjob.maintenance;

import com.blog.core.model.CleanUpItemConfigModel;
import com.blog.core.model.DbCleanUpCronjobModel;
import de.hybris.platform.core.model.ItemModel;
import de.hybris.platform.core.model.media.MediaModel;
import de.hybris.platform.jobs.maintenance.MaintenanceCleanupStrategy;
import de.hybris.platform.servicelayer.model.ModelService;
import de.hybris.platform.servicelayer.search.FlexibleSearchQuery;
import org.apache.log4j.Logger;
import org.joda.time.Days;
import org.joda.time.LocalDateTime;
import org.springframework.beans.factory.annotation.Required;

import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.lang.String.format;

public class CleanUpDbStrategy implements MaintenanceCleanupStrategy<ItemModel, DbCleanUpCronjobModel> {

    private static final Logger LOG = Logger.getLogger(CleanUpDbStrategy.class);

    private ModelService modelService;

    @Override
    public FlexibleSearchQuery createFetchQuery(DbCleanUpCronjobModel cjm) {
        return createQuery(cjm.getItemsConfig());
    }

    private FlexibleSearchQuery createQuery(Set<CleanUpItemConfigModel> itemsConfig) {
        final StringBuilder builder = new StringBuilder();

        Map<String, Object> params = new HashMap<>();

        if (itemsConfig.size() > 1) {

            builder.append("SELECT item.pk FROM (");

            Iterator<CleanUpItemConfigModel> itemConfigModels = itemsConfig.iterator();

            for (int i = 0; i < itemsConfig.size(); i++) {
                CleanUpItemConfigModel cleanUpItemConfig = itemConfigModels.next();
                String dateAlias = "date" + i;
                builder.append("{{ ");
                builder.append("SELECT {" + ItemModel.PK + "} FROM {" + cleanUpItemConfig.getItem().getCode() + "}");
                builder.append(" WHERE {" + ItemModel.MODIFIEDTIME + "} < ?" + dateAlias);

                if ("ImpExMedia".equalsIgnoreCase(cleanUpItemConfig.getItem().getCode())) {
                    builder.append(" AND {" + MediaModel.REMOVABLE + "} = 1");
                }

                builder.append(" }}");
                params.put(dateAlias, getThresholdDate(cleanUpItemConfig.getDays()));

                if (itemConfigModels.hasNext()) {
                    builder.append(" UNION ");
                }
            }

            builder.append(") item");

        } else {
            CleanUpItemConfigModel cleanUpItemConfig = itemsConfig.iterator().next();

            builder.append("SELECT {" + ItemModel.PK + "} FROM {" + cleanUpItemConfig.getItem().getCode() + "}");
            builder.append(" WHERE {" + ItemModel.MODIFIEDTIME + "} < ?date");

            params.put("date", getThresholdDate(cleanUpItemConfig.getDays()));

        }
        final FlexibleSearchQuery query = new FlexibleSearchQuery(builder);
        query.addQueryParameters(params);
        if (LOG.isDebugEnabled()) {
             LOG.debug(format("Query is [%s] with params [%s]", query.getQuery(), query.getQueryParameters().toString()));
        }
        return query;
    }

    private Date getThresholdDate(Integer days) {
        return new LocalDateTime().minus(Days.days(days)).toDate();
    }

    @Override
    public void process(List<ItemModel> elements) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(format("Found %d items to remove", elements.size()));
        }
        modelService.removeAll(elements);
    }

    @Required
    public void setModelService(final ModelService modelService) {
        this.modelService = modelService;
    }

}

Now we can create a bean for maintenance strategy:

1
2
3
4
5
6
7
8
    <bean id="cleanUpDbStrategy" parent="abstractGenericMaintenanceJobPerformable">
        <property name="maintenanceCleanupStrategy">
            <bean class="com.blog.core.cronjob.maintenance.CleanUpDbStrategy">
                <property name="modelService" ref="modelService"/>
            </bean>
        </property>
        <property name="pageSize" value="100000"/>
    </bean>

And configure newly created CronJob with ImpEx:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
INSERT_UPDATE MaintenanceCleanupJob; code[unique = true]   ; springId                         ; threshold
                                   ; CleanUpDBJob          ; cleanUpDbStrategy                ;

INSERT_UPDATE DbCleanUpCronjob; code[unique = true]  ; job(code)    ; logToDatabase[default = false]; logToFile[default = false]; retry[default = false]; active[default = true]; sessionLanguage(isoCode)[default = en]; sessionUser(uid)[default = admin]; errorMode(code)[default = IGNORE]
                              ; CleanUpDBJob-cronJob ; CleanUpDBJob ;

INSERT_UPDATE CleanUpItemConfig; item(code)[unique = true]; days; job(code)
                               ; ProcessTaskLog           ; 30  ; CleanUpDBJob-cronJob
                               ; BusinessProcess          ; 90  ; CleanUpDBJob-cronJob
                               ; ImpExMedia               ; 10  ; CleanUpDBJob-cronJob
                               ; StoredHttpSession        ; 1   ; CleanUpDBJob-cronJob

INSERT_UPDATE Trigger; cronJob(code)[unique = true]  ; second[default = 0]; minute; hour; day[default = -1]; month[default = -1]; year[default = -1]; relative; active[default = true]; maxAcceptableDelay[default = -1];
                     ; CleanUpDBJob-cronJob          ;                    ; 00    ; 02  ;                  ;                    ;                   ; false   ;                       ;                                 ;

In ImpEx is defined Hybris OOTB item types, which are usually should be clean up after some period.

P.S. Be aware that removing of old ImpExMedia entries from DB also removes a real file from the media folder (even if it is Amazon S3 or other blob storage).

P.P.S. Don’t forget to check “Create essential data” during HAC update for proper CronJob creation.

comments powered by Disqus