In [1]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim, semantic_search
import pandas as pd

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "MIG-56c53afb-6f08-5e5b-83fa-32fc6f09eeb0,MIG-ee0daf5f-9543-5e3f-8157-308a15c318b4,MIG-fbb89bfe-6460-508c-ab51-9b961def7e01"

In [2]:
# Read the dataframes
unlabeled = pd.read_csv("../../data/prepared/unlabeled.csv", index_col=False)
train = pd.read_csv("../../data/prepared/train.csv", index_col=False)
valid = pd.read_csv("../../data/prepared/valid.csv", index_col=False)
test = pd.read_csv("../../data/prepared/test.csv", index_col=False)

# Concatenate them together
df = pd.concat([unlabeled, train, valid], ignore_index=True)
df = df[['ticket', 'problem_abstract']]
df = df.drop_duplicates()

In [3]:
class SentenceEmbeddingRecommender:
    def __init__(self, data: pd.DataFrame, model_name: str = 'thenlper/gte-base'):
        self._data = data
        self.model = SentenceTransformer(model_name)
        self.document_vectors = self._transform_to_embeddings(data)

    def _transform_to_embeddings(self, df):
        sentences = df['problem_abstract'].apply(lambda s: "" if s is None or pd.isna(s) else s).tolist()
        return self.model.encode(sentences, normalize_embeddings=True, convert_to_tensor=True)

    def recommend(self, abstract: str, n: int = 10) -> pd.DataFrame:

        abstract_embedding = self.model.encode(abstract, convert_to_tensor=True)
        hits = semantic_search(abstract_embedding, self.document_vectors)[0]
        return self._data.iloc[[h["corpus_id"] for h in hits[:n]]]


# Example Usage with default model
recommender_gte = SentenceEmbeddingRecommender(df, model_name='thenlper/gte-base')
recommended_gte = [
    (test.iloc[i]["problem_abstract"], recommender_gte.recommend(test.iloc[i]["problem_abstract"]))
    for i 
    in range(10)
]
recommended_gte

[('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
            ticket                                   problem_abstract
  10531  315133933  I have domain "EMEA IBM NMS and GTM Grey" acti...
  9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
  11010  307709799  Access to MSS_GSC_OOEEE domain for my GTAC dom...
  8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
  9740   310344317  Some assets on GSNI UK Vitalnet are not update...
  10357  308248367            Enable GPS SYNC for GSSI SSA Containers
  11489  307788890  Unable to create Log viewer requests for "IBM ...
  8789   308677256  Please verify GESS managed environment involve...
  9788   307763537  GTAC is not working  for all GMIS customers po...
  6934   312052061  GTAC server mapping not working correctly for ...),
 ('Credentials for new customer Siemens Energy',
            ticket                                   problem_abstract
  8375   311658856                         

In [4]:
recommended_gte[0]

('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
           ticket                                   problem_abstract
 10531  315133933  I have domain "EMEA IBM NMS and GTM Grey" acti...
 9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
 11010  307709799  Access to MSS_GSC_OOEEE domain for my GTAC dom...
 8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
 9740   310344317  Some assets on GSNI UK Vitalnet are not update...
 10357  308248367            Enable GPS SYNC for GSSI SSA Containers
 11489  307788890  Unable to create Log viewer requests for "IBM ...
 8789   308677256  Please verify GESS managed environment involve...
 9788   307763537  GTAC is not working  for all GMIS customers po...
 6934   312052061  GTAC server mapping not working correctly for ...)

In [5]:
recommended_gte[1]

('Credentials for new customer Siemens Energy',
           ticket                                   problem_abstract
 8375   311658856                                     Siemens Energy
 11440  314373286  Additional Credentials for Sysco Corp Customer...
 10931  309825371                           New Security credentials
 11427  308165272  Customer faced challenges to login (with  corr...
 8521   314231715                                Customer Onboarding
 11173  305312660          New Customer Account Needed - Sysco Foods
 4149   310549957            New DTAC Engineer access request to PAL
 4315   310680069            NEW DTAC ENGINEER ACCESS REQUEST TO PAL
 5766   312423300            New DTAC Engineer access request to PAL
 969    307034869  BD Center receives company level error for Sie...)

In [6]:
recommended_gte[2]

('Request to provide Telco Inventory (BMP & WFA Inventory)',
           ticket                                   problem_abstract
 3935   310260894       BFEC 519418 WMS 252307287 BMP TT # 252301842
 5912   312575829  MechID M98467 needs DB access to "AT&T Solutio...
 10505  300760130                         CA & SM Customer Inventory
 10352  311509796                          Request for Customer Data
 1893   308306882  SR# 7936578, 7801567, 7750972\n T&M needs to b...
 8431   307809855  Requesting Sales Hierarchy info from Webphone ...
 5040   311611563      BFEC 508820 BMP TT # 271101221 WMS 2706032309
 8484   315253103                            Customer Access Request
 8936   309342160  Attn: Mohan Periyasamy - Requesting enhancemen...
 7121   312640814                  BMP ticket 280401583 (PIN 463317))

In [7]:
recommended_gte[3]

('ACTION Web will not show me router information',
           ticket                                   problem_abstract
 7043   312433048  For some routers no option to view config. oth...
 9614   311724127  I have access to Action V.3 and have gained al...
 9979   309286106                            Unable to access router
 7033   312415345       Unable to open Action URL action.web.att.com
 8321   305268467             Action V3 can not pull out device info
 8292   313213817                           Unable to access routers
 9932   311297088  I am no longer able to pull router configs in ...
 8852   309089161  Action Configuration File Search webpage not w...
 9106   306577832  I am not able to search and see some devices i...
 10476  313476486  Action Outage | C2W | unable to find device de...)

In [8]:
recommended_gte[4]

('Trying to change password get popup with fatal error',
           ticket                                   problem_abstract
 8495   309637027  When trying to change password getting pop up ...
 9140   309341142  Encountering Fatal Error when trying to change...
 10984  304145595                        password change fatal error
 11080  306532398  Unable to change password and it gives me "Fat...
 9085   305089627    Getting an error when trying to change password
 9286   306707973              I cant change password with php error
 11635  306626018                  Trying to change password - Error
 9693   308424545                            Error changing password
 11475  306511298    Receiving error when attempting password change)

In [9]:
recommended_gte[5]

('Add the new Wireless Controller to the Cisco ISE servers and share the Key',
           ticket                                   problem_abstract
 9759   312447919  Add new Cisco Wireless controller to the Cisco...
 10649  313206266       Need to add Wireless controller to cisco ISE
 9016   313020079             Add Wireless controller to ISE servers
 9457   313136986            Need to add new controller to Cisco ISE
 9485   313507473        Add the eWC controller to Cisco ISE servers
 9081   307189047  ISE IBM IGA | Add the controller to the Cisco ...
 11671  307770609           Need to add new controller in ISE server
 8388   307706782               Add the WLC to the Cisco ISE Servers
 8238   311140003                Controller Addition on ISE servers.
 9504   313499876  Add new controller hj9-wc-c2 xxxxxxx to the II...)

In [10]:
recommended_gte[6]

('Kyndryl config backup is missing from the directories',
           ticket                                   problem_abstract
 10626  308097953  Configuration backup is not working for specif...
 7183   312861812             Configuration is not getting backed up
 7902   308829455             don't see backup config in action-tool
 11021  315300651        Missing configuration backup file on poller
 7542   309686335         Missing backup config for multiple devices
 8491   307412874     Running configuration not backed up in action.
 8009   308524680  Backup configuration file of device not in ACTION
 10593  314298897  unable to pull older backup configs from the p...
 7516   309799931  URGENT!!! - Action config backup not working p...
 7517   309799776  URGENT!!! - Action config backup not working p...)

In [11]:
recommended_gte[7]

('Customer is unable to login to firewall device . ID is configured in device with GTAC radius auth.',
           ticket                                   problem_abstract
 8702   307880882  Customer could not access device using GTAC lo...
 10690  312935250  Unable to login using Gtac id. I can be reache...
 8770   307111787  MAIN GTAC TICKET - Unable to login to devices ...
 9766   307111625          Unable to login to any devices using GTAC
 8959   311507415  device is not able to authenticate via gtac. d...
 10876  306303912               Unable to login any device with GTAC
 10531  315133933  I have domain "EMEA IBM NMS and GTM Grey" acti...
 11301  315256600  Unable to login to GMIS Vyatta routers using G...
 10551  312377455  Can't login via GTAC from Aug  7 23:22:35 (NW ...
 9829   305696621              User is unable to access GTAC Domains)

In [12]:
recommended_gte[8]

('.cfg files not available on the poller',
           ticket                                   problem_abstract
 11021  315300651        Missing configuration backup file on poller
 6984   312257393   /tools/gt3/bin scripts not working on any poller
 1351   307856873    Configuration not getting backed up from poller
 8131   309325803                Script does not work on new pollers
 8445   313159814  I'm not able to download the Action file from ...
 11219  307796836        USSTAFGWSCO0301UJZZ01 not getting on poller
 8541   310960834                Unable to access Pollers using GTAC
 7991   308585734  Kindly assist to fix the primary poller's poll...
 11382  312258280   CBACK, HOIP  script is not working from pollers.
 5626   312256072  I'm choosing this App because can't find anyth...)

In [13]:
recommended_gte[9]

('down',
          ticket                                   problem_abstract
 738   307269855         **ATLN1/21 - CHCG1-/14<BMP# 220101705>DOWN
 531   306629938        **HSTN01S/4 - KSCY01S/3<BMP# 211000892>DOWN
 5377  312044622                                    L6YX999130 DOWN
 4439  310868132                                    L6YX969485 DOWN
 5027  311594651                                    BFEC588962 DOWN
 739   307269226           **CNCO/5 - NSVL01S/3<BMP# 220101631>DOWN
 2961  309460574  ATLNGAMDX03P_32C P138/S10 -- KSCYMOMDX03P_32C ...
 818   307363151                                    L6YX129861 DOWN
 5450  312141087                                    L6YX129861 down
 9205  307287205                 Down Devices Not Working Correctly)