https://static.getml.com/logo/logo-main.png

What is getML?

getML is a software for automated machine learning (AutoML) with a special focus on feature engineering for relational data and time series.

Many people have implemented their own feature engineering tools on top of frameworks like data.tables, pandas or Apache Spark. In essence, they all use a brute force approach: Generate a large number of features, then use some sort of feature selection routine to pick a small subselection of these features.

Unfortunately, this is very inefficient. Click here to find out why that is.

getML wants to be better than this. getML offers unique and highly efficient algorithms to engineer features for relational data and time series. These algorithms can produce features that are far more advanced than what any data scientist could write by hand or what you could accomplish using simple brute force approaches.

To get an impression what getML can do, check out this set of features that are based on the Consumer Expenditure Example Dataset included in this tutorial:

CREATE TABLE FEATURE_1 AS
SELECT COUNT( DISTINCT t2.UCC4 ) AS feature_1,
       t1.BASKETID,
       t1.TIME_STAMP
FROM (
     SELECT *,
            ROW_NUMBER() OVER ( ORDER BY BASKETID, TIME_STAMP ASC ) AS rownum
     FROM EXPD
) t1
LEFT JOIN EXPD t2
ON t1.BASKETID = t2.BASKETID
WHERE (
   ( t1.UCC NOT IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 NOT IN ( '56031', '28014', '34090', '67031', '67090', '20021', '59023', '38034', '32033', '32013', '47022', '60031', '49030', '56021', '53041', '41090', '31014', '38041', '67011', '29031', '62041', '38011', '68022', '23090', '64012', '55011', '99990', '62061', '68011', '60090', '00410', '00419' ) AND t2.UCC NOT IN ( '190212', '160310', '180612', '200512', '200532', '610110', '470112' ) AND t2.UCC IN ( '200410', '690114', '680903', '600420' ) )
OR ( t1.UCC NOT IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 NOT IN ( '56031', '28014', '34090', '67031', '67090', '20021', '59023', '38034', '32033', '32013', '47022', '60031', '49030', '56021', '53041', '41090', '31014', '38041', '67011', '29031', '62041', '38011', '68022', '23090', '64012', '55011', '99990', '62061', '68011', '60090', '00410', '00419' ) AND t2.UCC IN ( '190212', '160310', '180612', '200512', '200532', '610110', '470112' ) AND t1.UCC IN ( '170210', '330510', '100410', '140420', '150110', '180710', '120210', '320903', '590230', '110310', '330610' ) )
OR ( t1.UCC NOT IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 IN ( '56031', '28014', '34090', '67031', '67090', '20021', '59023', '38034', '32033', '32013', '47022', '60031', '49030', '56021', '53041', '41090', '31014', '38041', '67011', '29031', '62041', '38011', '68022', '23090', '64012', '55011', '99990', '62061', '68011', '60090', '00410', '00419' ) AND t1.UCC5 IN ( '11041', '17021', '33051', '10041', '14042', '15011', '18022', '18071', '26011', '12021', '36035', '32090', '50011', '59023', '38034', '64022', '11031', '53021', '39021', '38021', '34053', '44012', '38043', '29031', '23090', '39023' ) )
OR ( t1.UCC IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 NOT IN ( '13031', '18032', '19021', '33041', '36021', '17053', '18022', '18051', '18071', '19031', '12021', '37021', '14021', '62021', '47021', '64031', '17041', '39012', '60021', '52053', '55031', '34011', '49031', '20051', '61032', '24021', '32033', '20052', '52054', '36031', '36042', '36051', '60031', '62012', '13012', '37031', '53031', '62011', '69011', '38021', '24011', '62092', '34021', '38041', '67011', '32022', '37090', '62081', '38032', '28022', '29043', '61014', '34091', '69023', '36012', '01012', '02011', '01032', '03081', '00400', '00210', '00419' ) AND t2.UCC IN ( '280210', '320370', '190322', '180520', '360350', '190324', '270210', '280140', '320903', '320420', '320902', '200532', '390110', '390310', '640220', '200410', '320130', '470220', '430110', '580000', '560210', '310316', '530903', '230110', '340520', '410901', '660210', '670902', '320232', '520110', '320110', '620410', '380110', '550330', '680220', '680903', '560330', '230900', '290320', '290120', '320901', '320522', '220210', '999900', '290410', '290440', '620320', '310232', '620710', '310210', '680110', '320511', '310340', '630900', '520902', '320120', '140410', '610230', '410140', '620930', '310241', '310331', '310314', '030410', '004100' ) )
OR ( t1.UCC IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 IN ( '13031', '18032', '19021', '33041', '36021', '17053', '18022', '18051', '18071', '19031', '12021', '37021', '14021', '62021', '47021', '64031', '17041', '39012', '60021', '52053', '55031', '34011', '49031', '20051', '61032', '24021', '32033', '20052', '52054', '36031', '36042', '36051', '60031', '62012', '13012', '37031', '53031', '62011', '69011', '38021', '24011', '62092', '34021', '38041', '67011', '32022', '37090', '62081', '38032', '28022', '29043', '61014', '34091', '69023', '36012', '01012', '02011', '01032', '03081', '00400', '00210', '00419' ) AND t2.UCC NOT IN ( '180320', '190211', '330410', '180220', '180710', '190212', '140210', '640310', '170410', '170533', '360420', '370212' ) AND t1.UCC NOT IN ( '630110', '380315', '400220', '390223' ) )
OR ( t1.UCC IN ( '330410', '280120', '630110', '320905', '660000', '380315', '370211', '390310', '400220', '360330', '360420', '390223', '410120', '410901', '670902', '380901', '610110', '690230', '004190' ) AND t2.UCC5 IN ( '13031', '18032', '19021', '33041', '36021', '17053', '18022', '18051', '18071', '19031', '12021', '37021', '14021', '62021', '47021', '64031', '17041', '39012', '60021', '52053', '55031', '34011', '49031', '20051', '61032', '24021', '32033', '20052', '52054', '36031', '36042', '36051', '60031', '62012', '13012', '37031', '53031', '62011', '69011', '38021', '24011', '62092', '34021', '38041', '67011', '32022', '37090', '62081', '38032', '28022', '29043', '61014', '34091', '69023', '36012', '01012', '02011', '01032', '03081', '00400', '00210', '00419' ) AND t2.UCC IN ( '180320', '190211', '330410', '180220', '180710', '190212', '140210', '640310', '170410', '170533', '360420', '370212' ) AND t1.UCC NOT IN ( '330410', '320905', '360420', '380901', '610110' ) )
) AND t2.TIME_STAMP <= t1.TIME_STAMP
GROUP BY t1.rownum,
         t1.BASKETID,
         t1.TIME_STAMP;


CREATE TABLE FEATURE_2 AS
SELECT COUNT( * ) - COUNT( DISTINCT t1.TIME_STAMP - t2.TIME_STAMP ) AS feature_2,
       t1.BASKETID,
       t1.TIME_STAMP
FROM (
     SELECT *,
            ROW_NUMBER() OVER ( ORDER BY BASKETID, TIME_STAMP ASC ) AS rownum
     FROM EXPD
) t1
LEFT JOIN EXPD t2
ON t1.BASKETID = t2.BASKETID
WHERE (
   ( t1.UCC NOT IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC NOT IN ( '190324', '270210', '210110', '380333', '360312', '360420', '010120', '020410', '002100' ) AND t2.UCC IN ( '360350', '350110', '130121', '320430', '520110', '620420', '380311', '560400', '620510', '600900', '020620', '001000' ) AND t1.UCC NOT IN ( '110410', '110510', '120310', '120410', '130212', '140110', '170210', '190111', '190211', '320140', '330510', '100210', '100510', '110210', '150110', '180220', '180710', '330210', '190112', '190113', '470111', '630110', '190114', '190212', '190321', '560110', '190311', '100110', '330310', '170310', '640310', '380315', '520531', '180611', '540000', '200532', '640410', '320232', '520110', '560400', '320521', '010210', '090110', '090210', '020110', '020210', '020310', '010320', '020510', '040410', '070110', '004000' ) )
OR ( t1.UCC NOT IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC IN ( '190324', '270210', '210110', '380333', '360312', '360420', '010120', '020410', '002100' ) AND t1.UCC IN ( '180310', '320410', '330510', '620912', '650210', '170520', '180510', '280120', '320233', '190313', '270310', '190322', '240320', '550410', '320904', '340110', '280140', '320345', '340901', '640410', '590230', '380340', '610320', '130211', '140340', '320130', '470220', '360311', '620330', '530311', '590110', '660110', '380901', '340530', '520110', '180620', '670210', '310232', '620710', '630220', '620930', '090210', '020710', '070230', '050310', '030410', '030510' ) )
OR ( t1.UCC IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC NOT IN ( '110410', '120410', '180320', '180410', '190211', '320140', '320410', '330410', '330510', '100210', '100510', '110210', '180420', '320233', '320370', '190112', '330110', '470111', '190313', '190114', '560110', '190311', '180520', '320905', '640310', '660000', '370125', '390120', '340110', '190312', '200532', '390310', '640410', '200210', '380340', '200310', '200410', '360311', '370220', '370314', '340520', '440210', '410120', '410901', '380901', '620926', '300218', '340210', '320232', '610110', '380410', '180620', '240120', '320380', '640120', '620420', '010120', '040110', '010320', '040510', '050310', '004000', '004190' ) AND t1.UCC NOT IN ( '600210', '340410', '410120', '690119', '600420', '620112', '004190' ) AND t2.UCC IN ( '280210', '370213', '620214', '160110', '360350', '470211', '160320', '520531', '430120', '690120', '130110', '590230', '620310', '140340', '380333', '360312', '620121', '620111', '230110', '660110', '660210', '660900', '380210', '550320', '320430', '680903', '380320', '370110', '620213', '320521', '290440', '620710', '380510', '690230', '020610', '020620', '020510', '030810', '040410', '070110', '030210' ) )
OR ( t1.UCC IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC NOT IN ( '110410', '120410', '180320', '180410', '190211', '320140', '320410', '330410', '330510', '100210', '100510', '110210', '180420', '320233', '320370', '190112', '330110', '470111', '190313', '190114', '560110', '190311', '180520', '320905', '640310', '660000', '370125', '390120', '340110', '190312', '200532', '390310', '640410', '200210', '380340', '200310', '200410', '360311', '370220', '370314', '340520', '440210', '410120', '410901', '380901', '620926', '300218', '340210', '320232', '610110', '380410', '180620', '240120', '320380', '640120', '620420', '010120', '040110', '010320', '040510', '050310', '004000', '004190' ) AND t1.UCC IN ( '600210', '340410', '410120', '690119', '600420', '620112', '004190' ) )
OR ( t1.UCC IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC IN ( '110410', '120410', '180320', '180410', '190211', '320140', '320410', '330410', '330510', '100210', '100510', '110210', '180420', '320233', '320370', '190112', '330110', '470111', '190313', '190114', '560110', '190311', '180520', '320905', '640310', '660000', '370125', '390120', '340110', '190312', '200532', '390310', '640410', '200210', '380340', '200310', '200410', '360311', '370220', '370314', '340520', '440210', '410120', '410901', '380901', '620926', '300218', '340210', '320232', '610110', '380410', '180620', '240120', '320380', '640120', '620420', '010120', '040110', '010320', '040510', '050310', '004000', '004190' ) AND t2.UCC4 NOT IN ( '1104', '1204', '1803', '1901', '1902', '3305', '1002', '1005', '1102', '3301', '4701', '6403', '1806', '2005', '3903', '6404', '4101', '3804', '3809', '3402', '6101', '0101', '0103', '0405', '0503', '0040' ) )
OR ( t1.UCC IN ( '180320', '330410', '270000', '390120', '600210', '170533', '430120', '340410', '130121', '370314', '390210', '410120', '410901', '330610', '610110', '690119', '600420', '620112', '030610', '004100', '004190' ) AND t2.UCC IN ( '110410', '120410', '180320', '180410', '190211', '320140', '320410', '330410', '330510', '100210', '100510', '110210', '180420', '320233', '320370', '190112', '330110', '470111', '190313', '190114', '560110', '190311', '180520', '320905', '640310', '660000', '370125', '390120', '340110', '190312', '200532', '390310', '640410', '200210', '380340', '200310', '200410', '360311', '370220', '370314', '340520', '440210', '410120', '410901', '380901', '620926', '300218', '340210', '320232', '610110', '380410', '180620', '240120', '320380', '640120', '620420', '010120', '040110', '010320', '040510', '050310', '004000', '004190' ) AND t2.UCC4 IN ( '1104', '1204', '1803', '1901', '1902', '3305', '1002', '1005', '1102', '3301', '4701', '6403', '1806', '2005', '3903', '6404', '4101', '3804', '3809', '3402', '6101', '0101', '0103', '0405', '0503', '0040' ) AND t2.UCC IN ( '180320', '100210', '640310', '200532', '390310', '010120', '004000' ) )
) AND t2.TIME_STAMP <= t1.TIME_STAMP
GROUP BY t1.rownum,
         t1.BASKETID,
         t1.TIME_STAMP;

...

CREATE TABLE FEATURE_500 AS
SELECT COUNT( * ) - COUNT( DISTINCT t1.TIME_STAMP - t2.TIME_STAMP ) AS feature_19,
       t1.BASKETID,
       t1.TIME_STAMP
FROM (
     SELECT *,
            ROW_NUMBER() OVER ( ORDER BY BASKETID, TIME_STAMP ASC ) AS rownum
     FROM EXPD
) t1
LEFT JOIN EXPD t2
ON t1.BASKETID = t2.BASKETID
WHERE (
   ( t1.UCC NOT IN ( '330410', '470211', '240320', '380315', '280140', '430120', '400220', '590230', '360420', '370220', '340520', '410120', '410130', '610902', '004190' ) AND t2.UCC NOT IN ( '280210', '220110', '370213', '160110', '400210', '190324', '370211', '390321', '270410', '620310', '200410', '360311', '250900', '600310', '370314', '240310', '530412', '200511', '140330', '390223', '660110', '660210', '660900', '550320', '340210', '520110', '620114', '620410', '380902', '370901', '240120', '620420', '390230', '620213', '520410', '380311', '560400', '320521', '310232', '630900', '630220', '340913', '610130', '340909', '070240' ) AND t2.UCC IN ( '110510', '160320', '150211', '004000', '002100' ) AND t1.UCC IN ( '110510', '180310', '320410', '140220', '640310', '660000', '180612', '190314', '210210', '370314', '240310', '530210', '320430', '380430', '610110', '180620', '190213', '060210', '020210', '020610', '020620', '020510', '040510', '040610' ) )
OR ( t1.UCC NOT IN ( '330410', '470211', '240320', '380315', '280140', '430120', '400220', '590230', '360420', '370220', '340520', '410120', '410130', '610902', '004190' ) AND t2.UCC IN ( '280210', '220110', '370213', '160110', '400210', '190324', '370211', '390321', '270410', '620310', '200410', '360311', '250900', '600310', '370314', '240310', '530412', '200511', '140330', '390223', '660110', '660210', '660900', '550320', '340210', '520110', '620114', '620410', '380902', '370901', '240120', '620420', '390230', '620213', '520410', '380311', '560400', '320521', '310232', '630900', '630220', '340913', '610130', '340909', '070240' ) AND t1.UCC IN ( '180310', '620912', '320370', '620214', '360350', '640310', '660000', '170410', '600210', '550310', '490312', '320420', '420115', '110310', '200310', '270410', '140340', '320130', '200522', '360312', '370314', '490300', '240310', '530210', '620111', '390210', '660110', '440120', '550320', '320232', '380430', '610110', '690119', '320221', '690114', '680903', '290440', '140410', '410140', '060210', '020210', '020610', '020820', '020510', '040510', '050310', '060310', '010110', '030210' ) )
OR ( t1.UCC IN ( '330410', '470211', '240320', '380315', '280140', '430120', '400220', '590230', '360420', '370220', '340520', '410120', '410130', '610902', '004190' ) AND t2.UCC NOT IN ( '120110', '120410', '180310', '320140', '330510', '550210', '620912', '100210', '110210', '150110', '190113', '190114', '180611', '170510', '190312', '390321', '110310', '620310', '360420', '620121', '390210', '390223', '180620', '550330', '190323', '620213', '320630', '020610', '080110', '040110', '050210', '050310', '010310' ) AND t2.UCC5 IN ( '16021', '47011', '27000', '36035', '47021', '16032', '27021', '31035', '52053', '55031', '32061', '40011', '32034', '67090', '14034', '22000', '37022', '62011', '23011', '38043', '41013', '38011', '37090', '68090', '32038', '53011', '56040', '63022', '43013', '61014', '45011', '01012', '02062', '03011', '03071', '07024', '03061', '03041', '00400', '00210', '00110' ) )
OR ( t1.UCC IN ( '330410', '470211', '240320', '380315', '280140', '430120', '400220', '590230', '360420', '370220', '340520', '410120', '410130', '610902', '004190' ) AND t2.UCC IN ( '120110', '120410', '180310', '320140', '330510', '550210', '620912', '100210', '110210', '150110', '190113', '190114', '180611', '170510', '190312', '390321', '110310', '620310', '360420', '620121', '390210', '390223', '180620', '550330', '190323', '620213', '320630', '020610', '080110', '040110', '050210', '050310', '010310' ) AND t2.UCC NOT IN ( '120410', '330510', '100210', '110210', '020610', '080110' ) )
) AND t2.TIME_STAMP <= t1.TIME_STAMP
GROUP BY t1.rownum,
         t1.BASKETID,
         t1.TIME_STAMP;

There are various ways to regularize the complexity of features - features can be simpler than this or more complex. You can try this out for yourself towards the end of this tutorial.

But getML offers more than that. getML is designed to automate as many tasks of a typical data science pipeline as possible.

Here are some of its other features:

  • Hyperparameter optimization routines

  • A web-based frontend to keep track of your projects

  • Easy deployment of models under an HTTP endpoint

  • A Python API that includes close integration with pandas and numpy

  • Database connectors (sqlite3 and postgres, more coming soon)

  • ETL tools, including automated column type detection

  • High-performance data frames

  • Written in highly optimized C++ code

getML offers a free trial version. Check it out now!

An Example: Customer Churn

Imagine that you are a data scientist working for a bank and you want to predict which of your customers are going to churn (switch to another bank).

You begin by defining the population of your statistical problem: You set up a reference date once a month. At this reference date, you look at all of your customers who were active on that reference date. You then define a variable that assumes a value of 1 if the customer closed his/her account within one month after the reference date and 0 otherwise. This variable is your target variable, which is what you want to predict. Your timestamp is the reference date.

Let’s say you have another table, TRANSACTIONS, which contains information on all transactions that your customers have made. You hypothesize that the volume and frequency with which customers conduct transactions can predict whether a customer is going to churn. This table is a peripheral table.

You then have another peripheral table, COMPLAINTS, which contains customer complaints. Whenever a customer has filed a complaint, this table stores the customer ID, the content of the complaint and the date it was made.

_images/snowflake_data_model_bank_example.png

In the traditional way of doing data science, you would now start hand-crafting your features. For instance: How many transactions has a customer made in the 90 days before the reference date? What was the total volume of the transactions made in the 90 days before the reference date?

You might handcraft features that look like this (using SQL or tools such as data.tables or pandas):

CREATE TABLE NUMBER_OF_TRANSACTIONS_LAST_90_DAYS AS
SELECT COUNT( * ) AS number_of_transactions_last_90_days,
       t1.join_key,
       t1.time_stamp
FROM (
     SELECT *,
        ROW_NUMBER() OVER ( ORDER BY customer_id, time_stamp ASC ) AS rownum
     FROM POPULATION
) t1
LEFT JOIN TRANSACTIONS t2
ON t1.customer_id = t2.customer_id
WHERE (
   ( t1.time_stamp - t2.time_stamp <= 90 )
) AND t2.time_stamp <= t1.time_stamp
GROUP BY t1.rownum,
     t1.customer_id,
     t1.time_stamp;

CREATE TABLE TOTAL_TRANSACTION_VOLUME_LAST_90_DAYS AS
SELECT SUM( t2.transaction_volume ) AS total_transaction_volume_last_90_days,
       t1.join_key,
       t1.time_stamp
FROM (
     SELECT *,
        ROW_NUMBER() OVER ( ORDER BY customer_id, time_stamp ASC ) AS rownum
     FROM POPULATION
) t1
LEFT JOIN TRANSACTIONS t2
ON t1.customer_id = t2.customer_id
WHERE (
   ( t1.time_stamp - t2.time_stamp <= 90 )
) AND t2.time_stamp <= t1.time_stamp
GROUP BY t1.rownum,
     t1.customer_id,
     t1.time_stamp;

...many more...

If you want to do this well, you will have to write hundreds of features like this.

But this is a very cumbersome procedure and is largely based on trial-and-error. For instance, the time frame of 90 days seems completely arbitrary and there is no reason to assume that 30 days, 60 days or 180 days might not work fine as well.

So highly paid professional data scientists spend most of their time writing dull SQL statements - that does not seem to make a lot of sense economically and we have not met too many data scientists who enjoy doing this sort of work. But it is how data science is currently being practiced in many companies all over the world.

getML offers an alternative. If you can load the tables POPULATION, TRANSACTIONS and COMPLAINTS into Python as pandas.DataFrame() objects, you can let getML determine the features for you. All you need to do is to identify the join keys, the relevant time stamps and your target variable:

import getml.aggregations as aggregations
import getml.engine as engine
import getml.loss_functions as loss_functions
import getml.models as models

population_placeholder = models.Placeholder(
    name="POPULATION",
    join_keys=["customer_id"],
    time_stamps=["time_stamp"],
    targets=["churn"]
)

transactions_placeholder = models.Placeholder(
    name="TRANSACTIONS",
    categorical=["transaction_type"]
    numerical=["transaction_volume", "currency"],
    join_keys=["customer_id"],
    time_stamps=["time_stamp"]
)

complaints_placeholder = models.Placeholder(
    name="COMPLAINTS",
    join_keys=["customer_id"],
    time_stamps=["time_stamp"]
)

population_placeholder.join(transactions_placeholder, "customer_id", "time_stamp")

population_placeholder.join(complaints_placeholder, "customer_id", "time_stamp")

model = models.MultirelModel(
    population=population_placeholder,
    peripheral=[transactions_placeholder, complaints_placeholder],
    loss_function=loss_functions.CrossEntropyLoss(),
    aggregation=[
        aggregations.Var,
        aggregations.Avg,
        aggregations.Sum,
        aggregations.Max,
        aggregations.Min,
        aggregations.Count],
    num_features=500
).send()

model.fit(
    population_table=population_table,
    peripheral_tables=[transactions, complaints]
)

getML will then generate features for you, such as the one in the opening paragraph:

CREATE TABLE FEATURE_1 AS
SELECT COUNT( * ) AS feature_1,
       t1.BASKETID,
       t1.TIME_STAMP
FROM (
     SELECT *,
        ROW_NUMBER() OVER ( ORDER BY BASKETID, TIME_STAMP ASC ) AS rownum
     FROM POPULATION_TABLE
) t1
LEFT JOIN TRANSACTIONS t2
ON t1.CUSTOMER_ID = t2.CUSTOMER_ID
WHERE (
   ( t2.transaction_volume > 124234.38 )
OR ( t2.transaction_type = 'transfer' AND ( currency = 'USD' OR currency = 'EUR' ) )
) AND t2.TIME_STAMP <= t1.TIME_STAMP
GROUP BY t1.rownum,
     t1.JOIN_KEY_1,
     t1.TIME_STAMP;

It is possible to directly retrieve the features as a numpy.array or generate predictions using the built-in machine learning algorithms. It is also possible to extract the SQL code underlying your features.